diff --git a/dear_petition/petition/etl/transform.py b/dear_petition/petition/etl/transform.py index 36645834..010f5304 100644 --- a/dear_petition/petition/etl/transform.py +++ b/dear_petition/petition/etl/transform.py @@ -1,7 +1,6 @@ import os from typing import List from django.db import transaction -from django.db.models import Q from dear_petition.petition import models as pm diff --git a/dear_petition/portal/etl/extract.py b/dear_petition/portal/etl/extract.py index 3ff32512..eda03c26 100644 --- a/dear_petition/portal/etl/extract.py +++ b/dear_petition/portal/etl/extract.py @@ -28,6 +28,6 @@ def parse_party_information(soup): """Party Information section""" return PartyInfo( defendant_name=party_info.parse_defendant_name(soup), - defendant_race=party_info.parse_defendant_race(soup), - defendant_sex=party_info.parse_defendant_sex(soup) + defendant_race=party_info.parse_defendant_race(soup) or "", + defendant_sex=party_info.parse_defendant_sex(soup) or "" ) diff --git a/dear_petition/portal/etl/models.py b/dear_petition/portal/etl/models.py index 7db43b68..0585c36b 100644 --- a/dear_petition/portal/etl/models.py +++ b/dear_petition/portal/etl/models.py @@ -6,6 +6,13 @@ from dear_petition.petition import constants +def parse_date(v): + """Parse date strings into datetime.date objects""" + if isinstance(v, str): + return dt.datetime.strptime(v, "%m/%d/%Y").date() + return v + + class CaseSummary(BaseModel): case_number: str county: str @@ -19,13 +26,12 @@ class Charge(BaseModel): degree: str offense_date: Union[dt.date, None] filed_date: Union[dt.date, None] + arrest_date: Union[dt.date, None] - @field_validator("offense_date", "filed_date", mode="before") + @field_validator("offense_date", "filed_date", "arrest_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); def transform_severity(self): """Attempt to convert Portal's degree to CIPRS severity""" @@ -46,9 +52,7 @@ class CaseInfo(BaseModel): @field_validator("case_status_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); class PartyInfo(BaseModel): @@ -67,9 +71,7 @@ class Disposition(BaseModel): @field_validator("event_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); def is_dismissed(self) -> bool: return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS @@ -99,5 +101,19 @@ def get_charge_by_number(self, charge_number: int): return charge def transform_offense_date(self) -> dt.date: + if not self.case_info.charges: + return None + offense_dates = [c.offense_date for c in self.case_info.charges] return min(offense_dates).isoformat() + + def transform_arrest_date(self) -> dt.date: + if not self.case_info.charges: + return None + + arrest_dates = [c.arrest_date for c in self.case_info.charges if c.arrest_date is not None] + + if not arrest_dates: + return None + + return min(arrest_dates).isoformat() diff --git a/dear_petition/portal/etl/parsers/case_info.py b/dear_petition/portal/etl/parsers/case_info.py index 347a4ea0..edba69f5 100644 --- a/dear_petition/portal/etl/parsers/case_info.py +++ b/dear_petition/portal/etl/parsers/case_info.py @@ -23,6 +23,7 @@ def parse_case_information(soup): degree=parse_charge_degree(tr=tr) or "", offense_date=parse_charge_offense_date(tr=tr) or None, filed_date=parse_charge_filed_date(tr=tr) or None, + arrest_date = parse_arrest_date(soup) or None, ) ) ci = CaseInfo( @@ -203,3 +204,26 @@ def parse_charge_filed_date(tr): """ # noqa elem = tr.select_one("roa-charge-data-column[ng-if*=FiledDate]") return elem["data-value"] + + +@catch_parse_error +def parse_arrest_date(soup): + """ + Parse arrest date. There can be multiple arrest dates for a single charge, but for now just take the first one. + + Sample HTML: +