-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_download.py
49 lines (40 loc) · 1.58 KB
/
image_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
The purpose of this script is to show how to automatically download images from a web site.
As an example, we will use the Pepper&Carrot site, a free (libre) and open-source webcomic.
(c) Sébastien Adam 2020
Website: https://www.sebastienadam.be
diaspora*: https://diasp.de/u/sebastienadam
LinkedIn: https://www.linkedin.com/in/sebastien-adam-be/
"""
from bs4 import BeautifulSoup
from io import BytesIO
from pathlib import Path
from PIL import Image
import requests
main_url = 'https://www.peppercarrot.com/'
r_main = requests.get(main_url)
if r_main.status_code != 200:
print(f'Failed to load {main_url}: {r_main.status_code}')
quit()
soup_main = BeautifulSoup(r_main.text, 'html5lib')
soup_homecontent = soup_main.find('div', class_='homecontent')
for figure in soup_homecontent.find_all('figure'):
episode_link = figure.find('a')
print(f"{episode_link['title']} : {episode_link['href']}")
r_episode = requests.get(episode_link['href'])
if r_episode.status_code != 200:
print(f'Failed to load {main_url}: {r_main.status_code}')
continue
soup_episode = BeautifulSoup(r_episode.text, 'html5lib')
for image in soup_episode.find_all('img', class_='comicpage'):
img_url = image['src']
print(img_url)
r_image = requests.get(img_url)
if r_image.status_code != 200:
print(f'Failed to load {img_url}: {r_image.status_code}')
continue
i = Image.open(BytesIO(r_image.content))
image_name = Path(img_url).name
i.save(image_name)