Skip to content

Commit

Permalink
Parse arrest date from Portal
Browse files Browse the repository at this point in the history
  • Loading branch information
Rebecca Drabenstott committed Jul 23, 2024
1 parent d80208b commit c886744
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 22 deletions.
1 change: 0 additions & 1 deletion dear_petition/petition/etl/transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from typing import List
from django.db import transaction
from django.db.models import Q

from dear_petition.petition import models as pm

Expand Down
16 changes: 12 additions & 4 deletions dear_petition/portal/etl/extract.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from bs4 import BeautifulSoup

from .models import CaseSummary, PartyInfo, PortalRecord
from .parsers import case_summary, dispositions, case_info, party_info
from .models import CaseSummary, PartyInfo, CaseEvents, PortalRecord
from .parsers import case_summary, dispositions, case_info, case_events, party_info


def extract_portal_record(source):
Expand All @@ -11,6 +11,7 @@ def extract_portal_record(source):
case_summary=parse_case_summary(soup),
case_info=case_info.parse_case_information(soup),
party_info=parse_party_information(soup),
case_events=parse_case_events(soup),
dispositions=dispositions.parse_dispositions(soup),
)

Expand All @@ -28,6 +29,13 @@ def parse_party_information(soup):
"""Party Information section"""
return PartyInfo(
defendant_name=party_info.parse_defendant_name(soup),
defendant_race=party_info.parse_defendant_race(soup),
defendant_sex=party_info.parse_defendant_sex(soup)
defendant_race=party_info.parse_defendant_race(soup) or "",
defendant_sex=party_info.parse_defendant_sex(soup) or ""
)


def parse_case_events(soup):
"""Case Events section"""
return CaseEvents(
process_served_date=case_events.parse_process_served_date(soup) or None,
)
38 changes: 29 additions & 9 deletions dear_petition/portal/etl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
from dear_petition.petition import constants


def parse_date(v):
"""Parse date strings into datetime.date objects"""
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y").date()
return v


class CaseSummary(BaseModel):
case_number: str
county: str
Expand All @@ -23,9 +30,7 @@ class Charge(BaseModel):
@field_validator("offense_date", "filed_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def transform_severity(self):
"""Attempt to convert Portal's degree to CIPRS severity"""
Expand All @@ -46,9 +51,7 @@ class CaseInfo(BaseModel):
@field_validator("case_status_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);


class PartyInfo(BaseModel):
Expand All @@ -57,6 +60,15 @@ class PartyInfo(BaseModel):
defendant_sex: str


class CaseEvents(BaseModel):
process_served_date: Union[dt.date, None]

@field_validator("process_served_date", mode="before")
@classmethod
def parse_date(cls, v):
return parse_date(v);


class Disposition(BaseModel):
event_date: Union[dt.date, None]
event: str
Expand All @@ -67,9 +79,7 @@ class Disposition(BaseModel):
@field_validator("event_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v
return parse_date(v);

def is_dismissed(self) -> bool:
return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS
Expand All @@ -90,6 +100,7 @@ class PortalRecord(BaseModel):
case_summary: CaseSummary
case_info: CaseInfo
party_info: PartyInfo
case_events: CaseEvents
dispositions: List[Disposition]

def get_charge_by_number(self, charge_number: int):
Expand All @@ -99,5 +110,14 @@ def get_charge_by_number(self, charge_number: int):
return charge

def transform_offense_date(self) -> dt.date:
if not self.case_info.charges:
return None

offense_dates = [c.offense_date for c in self.case_info.charges]
return min(offense_dates).isoformat()

def transform_arrest_date(self) -> dt.date:
if not self.case_events.process_served_date:
return None

return self.case_events.process_served_date.isoformat()
63 changes: 63 additions & 0 deletions dear_petition/portal/etl/parsers/case_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from .utils import catch_parse_error


@catch_parse_error
def parse_process_served_date(soup):
"""
Parse process served date
Sample HTML:
<div data-rem-class="roa-event-row" ng-class="::{'roa-text-strike':event.Event.IsDeleted}" class="ng-scope">
<div class="roa-inline roa-align-top roa-event-date-col ng-scope" ng-if="::!minuteEvent" style="min-width:11rem">
<div ng-transclude="" ng-class="::{'roa-text-strike': event.Event.IsDeleted}">
<span class="ng-binding ng-scope">08/12/1988</span>
</div>
</div>
<div data-rem-class="roa-event-content" class="roa-inline roa-align-top" style="min-width:calc(100% - 24rem)">
<div>
<div>
<roa-documents></roa-documents>
<div class="roa-data ng-isolate-scope" label="Legacy Process Served Date">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">Legacy Process Served Date</div>
<div class="roa-value roa-inline roa-indent" ng-transclude=""></div>
</div>
</div>
<div ng-if="::data.Options.IncludeChargeDescriptions == '1'" class="ng-scope">
<div class="roa-indent">
<table class="roa-table">
<tbody></tbody>
</table>
</div>
</div>
<div ng-if="::data.Options.ShowTimestampCreate == '1'" class="roa-indent-2 ng-scope">
<div class="roa-data ng-isolate-scope" label="Created:">
<div class="roa-label roa-inline roa-align-top ng-binding ng-scope" ng-if="::label" ng-bind="::label">Created:</div>
<div class="roa-value roa-inline roa-indent" ng-transclude="">
<span class="ng-binding ng-scope">08/12/1988 12:00 AM</span>
</div>
</div>
</div>
</div>
</div>
<div ng-class="::$scope.checkEventDiffPriorsClass(event, 0)" data-rem-class="roa-event-content-2" class="roa-inline roa-align-top" style="min-width:11rem"></div>
</div>
""" # noqa

# Find the div with the label 'Legacy Process Served Date'. Return None if not found.
div_process_served_date = soup.find('div', {'label': 'Legacy Process Served Date'})
if not div_process_served_date:
return None

# Navigate up 4 parents
parent_div = div_process_served_date
for _ in range(4):
parent_div = parent_div.find_parent()

# Find the date div and extract the date value
date_div = parent_div.find('div', class_='roa-event-date-col')
date_span = date_div.find('span', class_='ng-binding ng-scope')
date_value = date_span.text.strip()

# Return date value
return date_value
2 changes: 1 addition & 1 deletion dear_petition/portal/etl/parsers/case_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
def parse_case_number(soup):
div = soup.find("div", string=re.compile(r"\s?Case Number\s?"))
match = FILENO_REGEX.search(div.parent.text)
return match.group() if match else ""
return match.group() if match else None


@catch_parse_error
Expand Down
14 changes: 11 additions & 3 deletions dear_petition/portal/etl/parsers/party_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@catch_parse_error
def parse_defendant_name(soup):
"""
Parse case type
Parse defendant name
Sample HTML:
Expand Down Expand Up @@ -41,7 +41,11 @@ def parse_defendant_race(soup):
White
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Race'}).get_text(strip=True)
race_div = soup.find('div', {'ng-if': '::party.Race'})
if not race_div:
return None

return race_div.get_text(strip=True)


@catch_parse_error
Expand All @@ -54,4 +58,8 @@ def parse_defendant_sex(soup):
Female
</div>
""" # noqa
return soup.find('div', {'ng-if': '::party.Gender'}).get_text(strip=True)
sex_div = soup.find('div', {'ng-if': '::party.Gender'})
if not sex_div:
return None

return sex_div.get_text(strip=True)
4 changes: 3 additions & 1 deletion dear_petition/portal/etl/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def transform_portal_record(source, location=""):
"""Transform eCourts Portal record to CIPRS-looking record."""
portal_record = extract_portal_record(source)
court = portal_record.case_summary.court
sex = portal_record.party_info.defendant_sex
return {
"General": {
"County": portal_record.case_summary.county,
Expand All @@ -16,11 +17,12 @@ def transform_portal_record(source, location=""):
"Case Information": {
"Case Status": portal_record.case_info.case_status,
"Offense Date": portal_record.transform_offense_date(),
"Arrest Date": portal_record.transform_arrest_date(),
},
"Defendant": {
"Name": portal_record.party_info.defendant_name,
"Race": portal_record.party_info.defendant_race,
"Sex": pc.SEX_MAP[portal_record.party_info.defendant_sex],
"Sex": pc.SEX_MAP[sex] if sex else "",
},
"District Court Offense Information": (
transform_offenses(portal_record) if court == "District" else []
Expand Down
4 changes: 2 additions & 2 deletions dear_petition/portal/tests/data/record.html
Original file line number Diff line number Diff line change
Expand Up @@ -3088,7 +3088,7 @@ <h1>
ng-class="::{'roa-text-strike': event.Event.IsDeleted}"
>
<span class="ng-binding ng-scope">
01/01/2001
01/03/2001
</span>
</div>
</div>
Expand Down Expand Up @@ -3174,7 +3174,7 @@ <h1>
ng-transclude=""
>
<span class="ng-binding ng-scope">
01/01/2001 12:00 AM
01/03/2001 12:00 AM
</span>
</div>
</div>
Expand Down
7 changes: 7 additions & 0 deletions dear_petition/portal/tests/parsers/test_case_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dear_petition.portal.etl.parsers import case_events


class TestCaseEventsFullRecord:
def test_parse_process_served_date(self, soup):
assert case_events.parse_process_served_date(soup) == "01/03/2001"

4 changes: 4 additions & 0 deletions dear_petition/portal/tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Charge,
PartyInfo,
Disposition,
CaseEvents,
)


Expand Down Expand Up @@ -36,6 +37,9 @@ def test_extract(sample_record):
defendant_race = "White",
defendant_sex="Female"
),
case_events=CaseEvents(
process_served_date=dt.date(2001, 1, 3)
),
dispositions=[
Disposition(
event_date=dt.date(2001, 12, 1),
Expand Down
6 changes: 5 additions & 1 deletion dear_petition/portal/tests/transform/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

def test_transform_full_record(sample_record):
expected = {
"Case Information": {"Case Status": "Disposed", "Offense Date": "2001-01-01"},
"Case Information": {
"Case Status": "Disposed",
"Offense Date": "2001-01-01",
"Arrest Date": "2001-01-03"
},
"Defendant": {
"Name": "DOE, JANE EMMA",
"Race": "White",
Expand Down

0 comments on commit c886744

Please sign in to comment.