Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse arrest date from Portal #488

Merged
merged 1 commit into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion dear_petition/petition/etl/transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from typing import List
from django.db import transaction
from django.db.models import Q

from dear_petition.petition import models as pm

Expand Down
4 changes: 2 additions & 2 deletions dear_petition/portal/etl/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ def parse_party_information(soup):
"""Party Information section"""
return PartyInfo(
defendant_name=party_info.parse_defendant_name(soup),
defendant_race=party_info.parse_defendant_race(soup),
defendant_sex=party_info.parse_defendant_sex(soup)
defendant_race=party_info.parse_defendant_race(soup) or "",
defendant_sex=party_info.parse_defendant_sex(soup) or ""
)
36 changes: 26 additions & 10 deletions dear_petition/portal/etl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
from dear_petition.petition import constants


def parse_date(v):
"""Parse date strings into datetime.date objects"""
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y").date()
return v


class CaseSummary(BaseModel):
case_number: str
county: str
Expand All @@ -19,13 +26,12 @@ class Charge(BaseModel):
degree: str
offense_date: Union[dt.date, None]
filed_date: Union[dt.date, None]
arrest_date: Union[dt.date, None]

@field_validator("offense_date", "filed_date", mode="before")
@field_validator("offense_date", "filed_date", "arrest_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def transform_severity(self):
"""Attempt to convert Portal's degree to CIPRS severity"""
Expand All @@ -46,9 +52,7 @@ class CaseInfo(BaseModel):
@field_validator("case_status_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);


class PartyInfo(BaseModel):
Expand All @@ -67,9 +71,7 @@ class Disposition(BaseModel):
@field_validator("event_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def is_dismissed(self) -> bool:
return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS
Expand Down Expand Up @@ -99,5 +101,19 @@ def get_charge_by_number(self, charge_number: int):
return charge

def transform_offense_date(self) -> dt.date:
if not self.case_info.charges:
return None

offense_dates = [c.offense_date for c in self.case_info.charges]
return min(offense_dates).isoformat()

def transform_arrest_date(self) -> dt.date:
if not self.case_info.charges:
return None

arrest_dates = [c.arrest_date for c in self.case_info.charges if c.arrest_date is not None]

if not arrest_dates:
return None

return min(arrest_dates).isoformat()
24 changes: 24 additions & 0 deletions dear_petition/portal/etl/parsers/case_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def parse_case_information(soup):
degree=parse_charge_degree(tr=tr) or "",
offense_date=parse_charge_offense_date(tr=tr) or None,
filed_date=parse_charge_filed_date(tr=tr) or None,
arrest_date = parse_arrest_date(soup) or None,
)
)
ci = CaseInfo(
Expand Down Expand Up @@ -203,3 +204,26 @@ def parse_charge_filed_date(tr):
""" # noqa
elem = tr.select_one("roa-charge-data-column[ng-if*=FiledDate]")
return elem["data-value"]


@catch_parse_error
def parse_arrest_date(soup):
"""
Parse arrest date. There can be multiple arrest dates for a single charge, but for now just take the first one.

Sample HTML:
<div class="roa-data ng-scope ng-isolate-scope" label="Date:" ng-if="::arrest.ArrestDate">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Date:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<span class="ng-binding ng-scope">
10/04/1991
</span>
</div>
</div>
"""
arrest_date_div = soup.find('div', {'ng-if': '::arrest.ArrestDate'})
if not arrest_date_div:
return None;
return arrest_date_div.find('span', class_='ng-binding ng-scope').get_text(strip=True)
14 changes: 11 additions & 3 deletions dear_petition/portal/etl/parsers/party_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@catch_parse_error
def parse_defendant_name(soup):
"""
Parse case type
Parse defendant name

Sample HTML:

Expand Down Expand Up @@ -41,7 +41,11 @@ def parse_defendant_race(soup):
White
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Race'}).get_text(strip=True)
race_div = soup.find('div', {'ng-if': '::party.Race'})
if not race_div:
return ""

return race_div.get_text(strip=True)


@catch_parse_error
Expand All @@ -54,4 +58,8 @@ def parse_defendant_sex(soup):
Female
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Gender'}).get_text(strip=True)
sex_div = soup.find('div', {'ng-if': '::party.Gender'})
if not sex_div:
return ""

return sex_div.get_text(strip=True)
4 changes: 3 additions & 1 deletion dear_petition/portal/etl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def transform_portal_record(source, location=""):
"""Transform eCourts Portal record to CIPRS-looking record."""
portal_record = extract_portal_record(source)
court = portal_record.case_summary.court
sex = portal_record.party_info.defendant_sex
return {
"General": {
"County": portal_record.case_summary.county,
Expand All @@ -16,11 +17,12 @@ def transform_portal_record(source, location=""):
"Case Information": {
"Case Status": portal_record.case_info.case_status,
"Offense Date": portal_record.transform_offense_date(),
"Arrest Date": portal_record.transform_arrest_date(),
},
"Defendant": {
"Name": portal_record.party_info.defendant_name,
"Race": portal_record.party_info.defendant_race,
"Sex": pc.SEX_MAP[portal_record.party_info.defendant_sex],
"Sex": pc.SEX_MAP[sex] if sex else "",
},
"District Court Offense Information": (
transform_offenses(portal_record) if court == "District" else []
Expand Down
29 changes: 28 additions & 1 deletion dear_petition/portal/tests/data/record.html
Original file line number Diff line number Diff line change
Expand Up @@ -2126,6 +2126,34 @@ <h1>
<div>
<!-- Arrests -->
<!-- ngIf: ::charge.Arrests.length -->
<div ng-if="::charge.Arrests.length" class="roa-pad-top ng-scope">
<div>Arrest</div>
<!-- ngRepeat: arrest in ::charge.Arrests -->
<div ng-repeat="arrest in ::charge.Arrests" class="roa-indent-2 ng-scope">
<!-- Arrest Date -->
<div class="roa-data ng-scope ng-isolate-scope" label="Date:" ng-if="::arrest.ArrestDate">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Date:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<span class="ng-binding ng-scope">01/03/2001</span>
</div>
</div>
<!-- Agency -->
<div class="roa-data ng-scope ng-isolate-scope" label="Agency:" ng-if="::arrest.AgencyAddress.length ||(arrest.AgencyCode && arrest.AgencyDescription)">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">
Agency:
</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<div ng-if="::arrest.AgencyCode && arrest.AgencyDescription" class="ng-binding ng-scope">
WAKESO - Wake County Sheriffs Office
</div>
</div>
</div>
</div>
<!-- end ngRepeat: arrest in ::charge.Arrests -->
</div>
<!-- end ngIf: ::charge.Arrests.length -->
<!-- Rearrests -->
<!-- ngIf: ::charge.Rearrests.length -->
<!-- Offense Reports -->
Expand Down Expand Up @@ -4126,4 +4154,3 @@ <h4>Internet Explorer 9 Users</h4>
window.onload = function () { // todo remove this - app cache is deprecated, and not even applicable to an app like ROA TylerUiCacheControl.start({ loaderPath: '', appTitle: 'Register of Actions', }); }
</script>
</body>

1 change: 1 addition & 0 deletions dear_petition/portal/tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_extract(sample_record):
degree="FNC",
offense_date=dt.date(2001, 1, 1),
filed_date=dt.date(2001, 1, 9),
arrest_date=dt.date(2001, 1, 3),
)
],
),
Expand Down
1 change: 1 addition & 0 deletions dear_petition/portal/tests/transform/test_dispositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def charge(self):
degree="FNC",
offense_date=dt.date(1994, 2, 12),
filed_date=dt.date(1994, 2, 14),
arrest_date=dt.date(1994, 4, 15),
)

@pytest.mark.parametrize(
Expand Down
6 changes: 5 additions & 1 deletion dear_petition/portal/tests/transform/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

def test_transform_full_record(sample_record):
expected = {
"Case Information": {"Case Status": "Disposed", "Offense Date": "2001-01-01"},
"Case Information": {
"Case Status": "Disposed",
"Offense Date": "2001-01-01",
"Arrest Date": "2001-01-03"
},
"Defendant": {
"Name": "DOE, JANE EMMA",
"Race": "White",
Expand Down
Loading