import urllib, BeautifulSoup, re, os, time, sys
if not os.path.exists('photos'): os.mkdir('photos')
if os.path.exists('lastid.tmp'):
tmp = open('lastid.tmp', 'r')
startID = int(tmp.readline())
imgCnt = int(tmp.readline())
tmp.close()
else:
startID = 1
imgCnt = 0
endID = 9999999
sImg = 0
sTime = time.clock()
print '\n'*50
print 'Now downloading: http://vkontakte.ru/id'+str(startID)
print
print 'Total images: '+str(imgCnt)
print 'Average speed: calculating...'
for ID in range(startID, endID):
try:
web = urllib.urlopen('http://vkontakte.ru/id'+str(ID))
soup = BeautifulSoup.BeautifulSoup(web)
avaUrl = 'None'
avaUrl = soup.find('div', {'id':'profile_avatar'}).find('img')['src']
match = re.search('http://[\w/.-]+', soup.find('a', {'id':'profile_photo_link'})['onclick'])
avaUrl = match.group()
except KeyboardInterrupt:
print 'Downloading stopped on http://vkontakte.ru/id'+str(ID)
print 'Will continue on the next start'
tmp = open('lastid.tmp', 'w')
tmp.write(str(ID)+'\n'+str(imgCnt+sImg))
tmp.close()
raw_input('press Enter to exit...')
exit()
except (TypeError, AttributeError), err:
pass
except Exception, err:
f = open('errorlog.txt','a')
f.write(str(ID)+': Error in urlopen: '+str(err)+'\n')
f.close
finally:
web.close()
try:
if (avaUrl != 'None') and (avaUrl != 'http://vkontakte.ru/images/question_a.gif'):
urllib.urlretrieve(avaUrl, os.path.dirname(sys.argv[0])+'\photos\id'+str(ID)+'.jpg')
sImg += 1
print '\n'*50
print 'Now downloading: http://vkontakte.ru/id'+str(ID+1)
print ''
print 'Image saved: '+os.path.dirname(sys.argv[0])+'\photos\id'+str(ID)+'.jpg'
print 'Total images: '+str(imgCnt+sImg)
print 'Average speed: '+str(round((sImg/(time.clock()-sTime))*60,1))+' img/min'
except KeyboardInterrupt:
print 'Please, try again later. (downloading is not complete)'
except Exception, err:
f = open('errorlog.txt','a')
f.write(str(ID)+': Error in urlretrieve: '+str(err)+'\n')
f.closeAdd a code snippet to your website: www.paste.org