-
Notifications
You must be signed in to change notification settings - Fork 128
/
Copy pathspider_ce.py
74 lines (64 loc) · 2.55 KB
/
spider_ce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import json
from collections import defaultdict
from typing import List
import scrapy
from covid19br.common.base_spider import BaseCovid19Spider
from covid19br.common.constants import State, ReportQuality
from covid19br.common.models.bulletin_models import (
CountyBulletinModel,
ImportedUndefinedBulletinModel,
)
CONFIRMED_CASE_LABEL = "Confirmados"
DEATH_CASE_LABEL = "Óbitos"
IMPORTED_OR_UNDEFINED_LABEL = "SEM INFORMAÇÃO"
class SpiderCE(BaseCovid19Spider):
state = State.CE
name = State.CE.value
information_delay_in_days = 0
report_qualities = [
ReportQuality.COUNTY_BULLETINS,
ReportQuality.UNDEFINED_OR_IMPORTED_CASES,
]
base_url = "https://indicadores.integrasus.saude.ce.gov.br/api/coronavirus/qtd-por-municipio"
def start_requests(self):
for date in self.requested_dates:
if date == self.today:
link = f"{self.base_url}"
else:
date_str = date.strftime("%Y-%m-%d")
link = f"{self.base_url}?dataFim={date_str}"
yield scrapy.Request(link, callback=self.parse, cb_kwargs={"date": date})
def parse(self, response, **kwargs):
date = kwargs["date"]
data = json.loads(response.body)
source = response.request.url
cases_by_city = self.group_cases_by_city(data)
for city, cases in cases_by_city.items():
if city == IMPORTED_OR_UNDEFINED_LABEL:
bulletin = ImportedUndefinedBulletinModel(
date=date,
state=self.state,
confirmed_cases=cases.get(CONFIRMED_CASE_LABEL),
deaths=cases.get(DEATH_CASE_LABEL),
source=source,
)
else:
bulletin = CountyBulletinModel(
date=date,
state=self.state,
city=city,
confirmed_cases=cases.get(CONFIRMED_CASE_LABEL),
deaths=cases.get(DEATH_CASE_LABEL),
source=source,
)
self.add_new_bulletin_to_report(bulletin, date)
@staticmethod
def group_cases_by_city(data: List[dict]) -> dict:
cases_by_city = defaultdict(dict)
for report in data:
report_type = report.get("tipo")
city = report.get("municipio")
cases = report.get("quantidade")
if report_type == CONFIRMED_CASE_LABEL or report_type == DEATH_CASE_LABEL:
cases_by_city[city][report_type] = cases
return cases_by_city