ABSP, Ch 11, Practice Project #2

My solution to the Practice Project #2 in Chapter 11 of the excellent book “Automate the Boring Stuff with Python” (ABSP)

#! usr/bin/env python3
# by lorenzo - 06/10/2017
# download all What-If (https://what-if.xkcd.com) images

import requests, os, bs4, sys

print("Where do you want to save the images?")
location = input()
if not os.path.isdir(location):
    print("This is not a valid location!")
    sys.exit()

url = "http://what-if.xkcd.com"
os.makedirs("xkcdWhatIf", exist_ok=True)
os.chdir(location + "/xkcdWhatIf")

while not url.endswith("/1/"):
    print("Downloading page %s..." % url)
    res = requests.get(url)
    res.raise_for_status()
    whatIfSoup = bs4.BeautifulSoup(res.text, "html.parser")
    whatIfElems = whatIfSoup.select(".illustration")

    if whatIfElems == []:
        print("Could not find any comic image.")
    else:
        for i in range(len(whatIfElems)):
            # some of the images have a full web address (from page 1 to 120), but others do not (from 121 onwards)
            if not whatIfElems[i].get("src").startswith("http://") and not whatIfElems[i].get("src").startswith("https://"):
                whatIfUrl = "http://what-if.xkcd.com" + whatIfElems[i].get("src")
            else:
                whatIfUrl = whatIfElems[i].get("src")
            print("Downloading image %s..." % (whatIfUrl))
            res = requests.get(whatIfUrl)
            res.raise_for_status()

            imageFile = open("xkcdWhatIf" + os.path.basename(whatIfUrl), "wb")
            for chunk in res.iter_content(100000):
                imageFile.write(chunk)
            imageFile.close()

    url = "https://what-if.xkcd.com" + whatIfSoup.select(".nav-prev a[href]")[0].get("href")
    continue

# separate case for the first page of What If -
# I duplicated the above code for lack of a better idea
url = "http://what-if.xkcd.com/1/"
print("Downloading page %s..." % url)
res = requests.get(url)
res.raise_for_status()
whatIfSoup = bs4.BeautifulSoup(res.text, "html.parser")
whatIfElems = whatIfSoup.select(".illustration")

if whatIfElems == []:
    print("Could not find any comic image.")
else:
    for i in range(len(whatIfElems)):
        if not whatIfElems[i].get("src").startswith("http://") and not whatIfElems[i].get("src").startswith("https://"):
            whatIfUrl = "http://what-if.xkcd.com" + whatIfElems[i].get("src")
        else:
            whatIfUrl = whatIfElems[i].get("src")
        print("Downloading image %s..." % (whatIfUrl))
        res = requests.get(whatIfUrl)
        res.raise_for_status()

        imageFile = open("xkcdWhatIf" + os.path.basename(whatIfUrl), "wb")
        for chunk in res.iter_content(100000):
            imageFile.write(chunk)
        imageFile.close()

print("Done.")

Comments about the code are welcome.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s