Ok, I got it working. Here's what I have:
from os import path
from urllib import urlopen
from urlparse import urlsplit
from BeautifulSoup import BeautifulSoup
from httplib import InvalidURL
savedir = 'E:\Documents and Settings\Mark-James McDougall\Desktop\DTA'
url = 'http://bombingscience.com/graffitiforum/index.php?showtopic=4900&st=%s'
main_url = 'http://bombingscience.com/'
for i in range(0, 526):
this_url = url % i
try:
soup = BeautifulSoup(urlopen(this_url))
except InvalidURL, e:
print 'url <%s> did not open: %s' % (this_url, e)
print sys.exit(1)
for img in soup.findAll('img'):
src = img['src']
# if it's from the ad server, let's ignore this image
if 'adserver' in src:
print 'This looks like an ad, skipping: %s' % src
continue
if not src.startswith('http://'):
image_url = main_url + src.strip('/')
else:
image_url = src
try:
image = urlopen(image_url).read()
relative_path = urlsplit(src)[2]
filename = relative_path.split('/')[-1]
open(path.join(savedir, filename), 'wb').write(image)
print 'got %s successfully' % image_url
except IOError, e:
print 'could not open this image: <%s>' % image_url
Although, it seems that a bunch of images get downloaded and some work properly, but others don't appear on my PC. The size and filename is there, but no image.
Any idea why?