-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.py
29 lines (19 loc) · 818 Bytes
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from bs4 import BeautifulSoup
import requests
def scrape_area_codes():
url = 'https://en.wikipedia.org/wiki/List_of_North_American_Numbering_Plan_area_codes'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
tables = soup.find_all('table', {'class': 'wikitable'})
table = tables[8]
area_codes_dict = {}
for row in table.find_all('tr')[1:]:
cells = row.find_all('td')
if len(cells) > 1:
state_or_province = cells[0].text.strip().replace(' (list)', '').lower()
area_codes = cells[1].text.strip().split(', ')
area_codes = [int(code) for code in area_codes]
if state_or_province not in area_codes_dict:
area_codes_dict[state_or_province] = []
area_codes_dict[state_or_province].extend(area_codes)
return area_codes_dict