forked from ApoorvGuptaAi/iitk-india-covid-data-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuttarakhand_parser.py
82 lines (66 loc) · 2.6 KB
/
uttarakhand_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
import dateutil.parser
from bs4 import BeautifulSoup
from hospital import Hospital, Resource, ResourceType
from database_helper import upload_hospitals
from dateutil import parser
from dateutil import tz
tzinfos = {"IST": tz.gettz('Asia/Kolkata')}
# List of web data sources
UTTARAKHAND_URL = 'https://covid19.uk.gov.in/bedssummary.aspx'
def parse_hospital_row(row_data):
district = row_data.find('span', {'id': 'lblDistrictName'}).text
hospitalName = row_data.find('span', {'id': 'lblhospitalName'}).text
availableGenBeds = int(
row_data.find('span', {
'id': 'Lbloccupiedgenralbeds'
}).text)
totalGenBeds = int(row_data.find('span', {'id': 'lbltotGenralbeds'}).text)
availableOxyBeds = int(
row_data.find('span', {
'id': 'lbloccupiedoxygenbeds'
}).text)
totalOxyBeds = int(row_data.find('span', {'id': 'lbltotoxygenbeds'}).text)
availableICUBeds = int(
row_data.find('span', {
'id': 'lbloccupiedicubeds'
}).text)
totalICUBeds = int(row_data.find('span', {'id': 'lbltoticubeds'}).text)
lastUpdatedAt = parser.parse(row_data.find('span', {
'id': 'lbllastupdated'
}).text + "IST",
tzinfos=tzinfos,
dayfirst=True)
resources = [
Resource(ResourceType.BED_WITHOUT_OXYGEN, "", availableGenBeds,
totalGenBeds),
Resource(ResourceType.BED_WITH_OXYGEN, "", availableOxyBeds,
totalOxyBeds),
Resource(ResourceType.ICUS, "", availableICUBeds, totalICUBeds),
]
hospital = Hospital(hospitalName, "", district, "", "Uttarakhand", "",
lastUpdatedAt, resources, "", UTTARAKHAND_URL, 0)
return hospital
def get_data_from_web():
webpage = requests.get(UTTARAKHAND_URL)
return BeautifulSoup(webpage.text, 'html.parser')
def parse_web_data(soup):
table = soup.find('table', {'id': 'grdhospitalbeds'})
hospital_rows = table.find('tbody').find_all('tr')
hospitals = []
for row_data in hospital_rows:
hospital = parse_hospital_row(row_data)
hospitals.append(hospital)
return hospitals
def get_uttarakhand_hospitals():
scraped_data = get_data_from_web()
hospitals = parse_web_data(scraped_data)
return {UTTARAKHAND_URL: hospitals}
def main():
hospital_data = get_uttarakhand_hospitals()
print(len(hospital_data[UTTARAKHAND_URL]))
for hospital in hospital_data[UTTARAKHAND_URL]:
print(hospital.last_updated)
#upload_hospitals(hospital_data)
if __name__ == "__main__":
main()