"""
Downloads all XKCD comic strips with respective titles (except 404, guess why)
Author: Manan
Version 2172014
"""
#!/usr/bin/env python
import urllib
import os.path
import sys
import re
def download():
directory = raw_input("Enter the filepath for the comics to be saved ")
count = 0
if not directory.endswith("/"):
directory += str("/")
pattern = re.compile("/comics/[a-z0-9_()]*.(jpg|png|gif)")
while True:
count += 1
if count == 404:
# Comic number 404 is not supported.
count +=1
page = urllib.urlopen(str("http://www.xkcd.com/") + str(count))
if "Last-Modified" not in page.headers:
break
search = pattern.search(page.read())
if search:
matcher = search.group()
name = os.path.basename(matcher)
if (not os.path.exists(str(directory) + str(name))):
urllib.urlretrieve("http://imgs.xkcd.com/" + matcher, os.path.join(directory, name))
print "Downloaded comic " + str(count) + " successfully"
else:
print "Comic was already present in the specified directory"
if __name__ == '__main__':
download()
]]>