diff --git a/dear_petition/petition/etl/transform.py b/dear_petition/petition/etl/transform.py index 36645834..010f5304 100644 --- a/dear_petition/petition/etl/transform.py +++ b/dear_petition/petition/etl/transform.py @@ -1,7 +1,6 @@ import os from typing import List from django.db import transaction -from django.db.models import Q from dear_petition.petition import models as pm diff --git a/dear_petition/portal/etl/extract.py b/dear_petition/portal/etl/extract.py index 3ff32512..eda03c26 100644 --- a/dear_petition/portal/etl/extract.py +++ b/dear_petition/portal/etl/extract.py @@ -28,6 +28,6 @@ def parse_party_information(soup): """Party Information section""" return PartyInfo( defendant_name=party_info.parse_defendant_name(soup), - defendant_race=party_info.parse_defendant_race(soup), - defendant_sex=party_info.parse_defendant_sex(soup) + defendant_race=party_info.parse_defendant_race(soup) or "", + defendant_sex=party_info.parse_defendant_sex(soup) or "" ) diff --git a/dear_petition/portal/etl/models.py b/dear_petition/portal/etl/models.py index 7db43b68..0585c36b 100644 --- a/dear_petition/portal/etl/models.py +++ b/dear_petition/portal/etl/models.py @@ -6,6 +6,13 @@ from dear_petition.petition import constants +def parse_date(v): + """Parse date strings into datetime.date objects""" + if isinstance(v, str): + return dt.datetime.strptime(v, "%m/%d/%Y").date() + return v + + class CaseSummary(BaseModel): case_number: str county: str @@ -19,13 +26,12 @@ class Charge(BaseModel): degree: str offense_date: Union[dt.date, None] filed_date: Union[dt.date, None] + arrest_date: Union[dt.date, None] - @field_validator("offense_date", "filed_date", mode="before") + @field_validator("offense_date", "filed_date", "arrest_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); def transform_severity(self): """Attempt to convert Portal's degree to CIPRS severity""" @@ -46,9 +52,7 @@ class CaseInfo(BaseModel): @field_validator("case_status_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); class PartyInfo(BaseModel): @@ -67,9 +71,7 @@ class Disposition(BaseModel): @field_validator("event_date", mode="before") @classmethod def parse_date(cls, v): - if isinstance(v, str): - return dt.datetime.strptime(v, "%m/%d/%Y") - return v + return parse_date(v); def is_dismissed(self) -> bool: return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS @@ -99,5 +101,19 @@ def get_charge_by_number(self, charge_number: int): return charge def transform_offense_date(self) -> dt.date: + if not self.case_info.charges: + return None + offense_dates = [c.offense_date for c in self.case_info.charges] return min(offense_dates).isoformat() + + def transform_arrest_date(self) -> dt.date: + if not self.case_info.charges: + return None + + arrest_dates = [c.arrest_date for c in self.case_info.charges if c.arrest_date is not None] + + if not arrest_dates: + return None + + return min(arrest_dates).isoformat() diff --git a/dear_petition/portal/etl/parsers/case_info.py b/dear_petition/portal/etl/parsers/case_info.py index 347a4ea0..edba69f5 100644 --- a/dear_petition/portal/etl/parsers/case_info.py +++ b/dear_petition/portal/etl/parsers/case_info.py @@ -23,6 +23,7 @@ def parse_case_information(soup): degree=parse_charge_degree(tr=tr) or "", offense_date=parse_charge_offense_date(tr=tr) or None, filed_date=parse_charge_filed_date(tr=tr) or None, + arrest_date = parse_arrest_date(soup) or None, ) ) ci = CaseInfo( @@ -203,3 +204,26 @@ def parse_charge_filed_date(tr): """ # noqa elem = tr.select_one("roa-charge-data-column[ng-if*=FiledDate]") return elem["data-value"] + + +@catch_parse_error +def parse_arrest_date(soup): + """ + Parse arrest date. There can be multiple arrest dates for a single charge, but for now just take the first one. + + Sample HTML: +
+
+ Date: +
+
+ + 10/04/1991 + +
+
+ """ + arrest_date_div = soup.find('div', {'ng-if': '::arrest.ArrestDate'}) + if not arrest_date_div: + return None; + return arrest_date_div.find('span', class_='ng-binding ng-scope').get_text(strip=True) diff --git a/dear_petition/portal/etl/parsers/party_info.py b/dear_petition/portal/etl/parsers/party_info.py index c849bbf8..10e6298b 100644 --- a/dear_petition/portal/etl/parsers/party_info.py +++ b/dear_petition/portal/etl/parsers/party_info.py @@ -6,7 +6,7 @@ @catch_parse_error def parse_defendant_name(soup): """ - Parse case type + Parse defendant name Sample HTML: @@ -41,7 +41,11 @@ def parse_defendant_race(soup): White """ # noqa - return soup.find('div', {'ng-if': '::party.Race'}).get_text(strip=True) + race_div = soup.find('div', {'ng-if': '::party.Race'}) + if not race_div: + return "" + + return race_div.get_text(strip=True) @catch_parse_error @@ -54,4 +58,8 @@ def parse_defendant_sex(soup): Female """ # noqa - return soup.find('div', {'ng-if': '::party.Gender'}).get_text(strip=True) + sex_div = soup.find('div', {'ng-if': '::party.Gender'}) + if not sex_div: + return "" + + return sex_div.get_text(strip=True) diff --git a/dear_petition/portal/etl/transform.py b/dear_petition/portal/etl/transform.py index 3e25037f..6f839e33 100644 --- a/dear_petition/portal/etl/transform.py +++ b/dear_petition/portal/etl/transform.py @@ -7,6 +7,7 @@ def transform_portal_record(source, location=""): """Transform eCourts Portal record to CIPRS-looking record.""" portal_record = extract_portal_record(source) court = portal_record.case_summary.court + sex = portal_record.party_info.defendant_sex return { "General": { "County": portal_record.case_summary.county, @@ -16,11 +17,12 @@ def transform_portal_record(source, location=""): "Case Information": { "Case Status": portal_record.case_info.case_status, "Offense Date": portal_record.transform_offense_date(), + "Arrest Date": portal_record.transform_arrest_date(), }, "Defendant": { "Name": portal_record.party_info.defendant_name, "Race": portal_record.party_info.defendant_race, - "Sex": pc.SEX_MAP[portal_record.party_info.defendant_sex], + "Sex": pc.SEX_MAP[sex] if sex else "", }, "District Court Offense Information": ( transform_offenses(portal_record) if court == "District" else [] diff --git a/dear_petition/portal/tests/data/record.html b/dear_petition/portal/tests/data/record.html index 24d2fcbf..5420e8ce 100644 --- a/dear_petition/portal/tests/data/record.html +++ b/dear_petition/portal/tests/data/record.html @@ -2126,6 +2126,34 @@

+
+
Arrest
+ +
+ +
+
+ Date: +
+
+ 01/03/2001 +
+
+ +
+
+ Agency: +
+
+
+ WAKESO - Wake County Sheriffs Office +
+
+
+
+ +
+ @@ -4126,4 +4154,3 @@

Internet Explorer 9 Users

window.onload = function () { // todo remove this - app cache is deprecated, and not even applicable to an app like ROA TylerUiCacheControl.start({ loaderPath: '', appTitle: 'Register of Actions', }); } - \ No newline at end of file diff --git a/dear_petition/portal/tests/test_extract.py b/dear_petition/portal/tests/test_extract.py index c4f7d6ad..77deec98 100644 --- a/dear_petition/portal/tests/test_extract.py +++ b/dear_petition/portal/tests/test_extract.py @@ -28,6 +28,7 @@ def test_extract(sample_record): degree="FNC", offense_date=dt.date(2001, 1, 1), filed_date=dt.date(2001, 1, 9), + arrest_date=dt.date(2001, 1, 3), ) ], ), diff --git a/dear_petition/portal/tests/transform/test_dispositions.py b/dear_petition/portal/tests/transform/test_dispositions.py index 5a9fce8a..a01c7099 100644 --- a/dear_petition/portal/tests/transform/test_dispositions.py +++ b/dear_petition/portal/tests/transform/test_dispositions.py @@ -54,6 +54,7 @@ def charge(self): degree="FNC", offense_date=dt.date(1994, 2, 12), filed_date=dt.date(1994, 2, 14), + arrest_date=dt.date(1994, 4, 15), ) @pytest.mark.parametrize( diff --git a/dear_petition/portal/tests/transform/test_record.py b/dear_petition/portal/tests/transform/test_record.py index 33cac180..6801e38c 100644 --- a/dear_petition/portal/tests/transform/test_record.py +++ b/dear_petition/portal/tests/transform/test_record.py @@ -3,7 +3,11 @@ def test_transform_full_record(sample_record): expected = { - "Case Information": {"Case Status": "Disposed", "Offense Date": "2001-01-01"}, + "Case Information": { + "Case Status": "Disposed", + "Offense Date": "2001-01-01", + "Arrest Date": "2001-01-03" + }, "Defendant": { "Name": "DOE, JANE EMMA", "Race": "White",