Skip to content

Commit

Permalink
Parse arrest date from Portal (#488)
Browse files Browse the repository at this point in the history
Also clean up some errors in logs
  • Loading branch information
rebecca-draben authored Aug 8, 2024
1 parent 649ac01 commit e386bdf
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 19 deletions.
1 change: 0 additions & 1 deletion dear_petition/petition/etl/transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from typing import List
from django.db import transaction
from django.db.models import Q

from dear_petition.petition import models as pm

Expand Down
4 changes: 2 additions & 2 deletions dear_petition/portal/etl/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ def parse_party_information(soup):
"""Party Information section"""
return PartyInfo(
defendant_name=party_info.parse_defendant_name(soup),
defendant_race=party_info.parse_defendant_race(soup),
defendant_sex=party_info.parse_defendant_sex(soup)
defendant_race=party_info.parse_defendant_race(soup) or "",
defendant_sex=party_info.parse_defendant_sex(soup) or ""
)
36 changes: 26 additions & 10 deletions dear_petition/portal/etl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
from dear_petition.petition import constants


def parse_date(v):
"""Parse date strings into datetime.date objects"""
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y").date()
return v


class CaseSummary(BaseModel):
case_number: str
county: str
Expand All @@ -19,13 +26,12 @@ class Charge(BaseModel):
degree: str
offense_date: Union[dt.date, None]
filed_date: Union[dt.date, None]
arrest_date: Union[dt.date, None]

@field_validator("offense_date", "filed_date", mode="before")
@field_validator("offense_date", "filed_date", "arrest_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def transform_severity(self):
"""Attempt to convert Portal's degree to CIPRS severity"""
Expand All @@ -46,9 +52,7 @@ class CaseInfo(BaseModel):
@field_validator("case_status_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);


class PartyInfo(BaseModel):
Expand All @@ -67,9 +71,7 @@ class Disposition(BaseModel):
@field_validator("event_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def is_dismissed(self) -> bool:
return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS
Expand Down Expand Up @@ -99,5 +101,19 @@ def get_charge_by_number(self, charge_number: int):
return charge

def transform_offense_date(self) -> dt.date:
if not self.case_info.charges:
return None

offense_dates = [c.offense_date for c in self.case_info.charges]
return min(offense_dates).isoformat()

def transform_arrest_date(self) -> dt.date:
if not self.case_info.charges:
return None

arrest_dates = [c.arrest_date for c in self.case_info.charges if c.arrest_date is not None]

if not arrest_dates:
return None

return min(arrest_dates).isoformat()
24 changes: 24 additions & 0 deletions dear_petition/portal/etl/parsers/case_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def parse_case_information(soup):
degree=parse_charge_degree(tr=tr) or "",
offense_date=parse_charge_offense_date(tr=tr) or None,
filed_date=parse_charge_filed_date(tr=tr) or None,
arrest_date = parse_arrest_date(soup) or None,
)
)
ci = CaseInfo(
Expand Down Expand Up @@ -203,3 +204,26 @@ def parse_charge_filed_date(tr):
""" # noqa
elem = tr.select_one("roa-charge-data-column[ng-if*=FiledDate]")
return elem["data-value"]


@catch_parse_error
def parse_arrest_date(soup):
"""
Parse arrest date. There can be multiple arrest dates for a single charge, but for now just take the first one.
Sample HTML:
<div class="roa-data ng-scope ng-isolate-scope" label="Date:" ng-if="::arrest.ArrestDate">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Date:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<span class="ng-binding ng-scope">
10/04/1991
</span>
</div>
</div>
"""
arrest_date_div = soup.find('div', {'ng-if': '::arrest.ArrestDate'})
if not arrest_date_div:
return None;
return arrest_date_div.find('span', class_='ng-binding ng-scope').get_text(strip=True)
14 changes: 11 additions & 3 deletions dear_petition/portal/etl/parsers/party_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@catch_parse_error
def parse_defendant_name(soup):
"""
Parse case type
Parse defendant name
Sample HTML:
Expand Down Expand Up @@ -41,7 +41,11 @@ def parse_defendant_race(soup):
White
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Race'}).get_text(strip=True)
race_div = soup.find('div', {'ng-if': '::party.Race'})
if not race_div:
return ""

return race_div.get_text(strip=True)


@catch_parse_error
Expand All @@ -54,4 +58,8 @@ def parse_defendant_sex(soup):
Female
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Gender'}).get_text(strip=True)
sex_div = soup.find('div', {'ng-if': '::party.Gender'})
if not sex_div:
return ""

return sex_div.get_text(strip=True)
4 changes: 3 additions & 1 deletion dear_petition/portal/etl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def transform_portal_record(source, location=""):
"""Transform eCourts Portal record to CIPRS-looking record."""
portal_record = extract_portal_record(source)
court = portal_record.case_summary.court
sex = portal_record.party_info.defendant_sex
return {
"General": {
"County": portal_record.case_summary.county,
Expand All @@ -16,11 +17,12 @@ def transform_portal_record(source, location=""):
"Case Information": {
"Case Status": portal_record.case_info.case_status,
"Offense Date": portal_record.transform_offense_date(),
"Arrest Date": portal_record.transform_arrest_date(),
},
"Defendant": {
"Name": portal_record.party_info.defendant_name,
"Race": portal_record.party_info.defendant_race,
"Sex": pc.SEX_MAP[portal_record.party_info.defendant_sex],
"Sex": pc.SEX_MAP[sex] if sex else "",
},
"District Court Offense Information": (
transform_offenses(portal_record) if court == "District" else []
Expand Down
29 changes: 28 additions & 1 deletion dear_petition/portal/tests/data/record.html
Original file line number Diff line number Diff line change
Expand Up @@ -2126,6 +2126,34 @@ <h1>
<div>
<!-- Arrests -->
<!-- ngIf: ::charge.Arrests.length -->
<div ng-if="::charge.Arrests.length" class="roa-pad-top ng-scope">
<div>Arrest</div>
<!-- ngRepeat: arrest in ::charge.Arrests -->
<div ng-repeat="arrest in ::charge.Arrests" class="roa-indent-2 ng-scope">
<!-- Arrest Date -->
<div class="roa-data ng-scope ng-isolate-scope" label="Date:" ng-if="::arrest.ArrestDate">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Date:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<span class="ng-binding ng-scope">01/03/2001</span>
</div>
</div>
<!-- Agency -->
<div class="roa-data ng-scope ng-isolate-scope" label="Agency:" ng-if="::arrest.AgencyAddress.length ||(arrest.AgencyCode && arrest.AgencyDescription)">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Agency:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<div ng-if="::arrest.AgencyCode && arrest.AgencyDescription" class="ng-binding ng-scope">
WAKESO - Wake County Sheriffs Office
</div>
</div>
</div>
</div>
<!-- end ngRepeat: arrest in ::charge.Arrests -->
</div>
<!-- end ngIf: ::charge.Arrests.length -->
<!-- Rearrests -->
<!-- ngIf: ::charge.Rearrests.length -->
<!-- Offense Reports -->
Expand Down Expand Up @@ -4126,4 +4154,3 @@ <h4>Internet Explorer 9 Users</h4>
window.onload = function () { // todo remove this - app cache is deprecated, and not even applicable to an app like ROA TylerUiCacheControl.start({ loaderPath: '', appTitle: 'Register of Actions', }); }
</script>
</body>

1 change: 1 addition & 0 deletions dear_petition/portal/tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_extract(sample_record):
degree="FNC",
offense_date=dt.date(2001, 1, 1),
filed_date=dt.date(2001, 1, 9),
arrest_date=dt.date(2001, 1, 3),
)
],
),
Expand Down
1 change: 1 addition & 0 deletions dear_petition/portal/tests/transform/test_dispositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def charge(self):
degree="FNC",
offense_date=dt.date(1994, 2, 12),
filed_date=dt.date(1994, 2, 14),
arrest_date=dt.date(1994, 4, 15),
)

@pytest.mark.parametrize(
Expand Down
6 changes: 5 additions & 1 deletion dear_petition/portal/tests/transform/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

def test_transform_full_record(sample_record):
expected = {
"Case Information": {"Case Status": "Disposed", "Offense Date": "2001-01-01"},
"Case Information": {
"Case Status": "Disposed",
"Offense Date": "2001-01-01",
"Arrest Date": "2001-01-03"
},
"Defendant": {
"Name": "DOE, JANE EMMA",
"Race": "White",
Expand Down

0 comments on commit e386bdf

Please sign in to comment.