-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcam_discovery.py
executable file
·66 lines (47 loc) · 1.49 KB
/
cam_discovery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import urllib2
import re
from urlparse import urlparse
from HTMLParser import HTMLParser
# parse unsecure cam urls from insecam.org
# no other way to do this ?
# Fake user agent to avoid 403
HEADERS = {'User-Agent' : 'Magic Browser'}
BASE_URL = "https://www.insecam.org"
NEWEST_PAGE = "/en/bynew/"
MOST_P_PAGE = "/en/byrating/"
NUMBER_PER_PAGE = 6
PAGINATION_PARAM = "?page="
# define our own html insecam parser
class CamUrlParser(HTMLParser):
cam_urls = []
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if self.is_cam_img(tag, attrs):
self.cam_urls.append(self.parse_url(attrs["src"]))
def is_cam_img(self, tag, attrs):
if tag != 'img':
return False
if not "class" in attrs or "thumbnail-item__img" not in attrs["class"]:
return False
return True
# specific url parsing
def parse_url(self, url):
if "/mjpg/video.mjpg" in url:
url = re.sub("/mjpg/video.mjpg", "/jpg/image.jpg", url)
return url
# get x newest cam from insecam
def get_newest_cam_urls(n):
return get_cam_urls(n, NEWEST_PAGE)
# get x best cam from insecam
def get_best_cam_urls(n):
return get_cam_urls(n, MOST_P_PAGE)
# get x cam urls from insecam
def get_cam_urls(n, which):
html_parser = CamUrlParser()
page = 1
while len(html_parser.cam_urls) < n:
http_request = urllib2.Request(url=BASE_URL + which + PAGINATION_PARAM + str(page), headers=HEADERS)
html_content = urllib2.urlopen(http_request).read()
html_parser.feed(html_content)
page += 1
return html_parser.cam_urls