-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcbs.py
32 lines (28 loc) · 1.15 KB
/
cbs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json
import feedparser
from urllib.parse import urljoin, urlparse
with open("cbs_urls.json", "r") as cbs_file:
cbs_json = json.load(cbs_file)
feeds = [
"https://www.cbsnews.com/latest/rss/main",
"https://www.cbsnews.com/latest/rss/us",
"https://www.cbsnews.com/latest/rss/politics",
"https://www.cbsnews.com/latest/rss/world",
"https://www.cbsnews.com/latest/rss/health",
"https://www.cbsnews.com/latest/rss/moneywatch",
"https://www.cbsnews.com/latest/rss/science",
"https://www.cbsnews.com/latest/rss/technology",
"https://www.cbsnews.com/latest/rss/entertainment",
"https://www.cbsnews.com/latest/rss/evening-news/cbs-news-investigates",
"https://www.cbsnews.com/latest/rss/60-minutes",
"https://www.cbsnews.com/latest/rss/evening-news",
"https://www.cbsnews.com/latest/rss/face-the-nation"
]
for url in feeds:
feed = feedparser.parse(url)
for article in feed.entries:
clean_url = urljoin(article['link'], urlparse(article['link']).path)
if not clean_url in cbs_json:
cbs_json.append(clean_url)
with open("cbs_urls.json", "w") as f:
f.write(json.dumps(cbs_json))