Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eCourts Portal import proof of concept #431

Merged
merged 34 commits into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
671c436
portal import proof of concept
copelco Sep 9, 2023
b5a86f7
Automatically embed username (#432)
robert-w-gries Sep 12, 2023
343ed34
run prettier
copelco Oct 1, 2023
f176a8e
iterate on parsing model; parse charges
copelco Oct 8, 2023
268bf4f
parse case type
copelco Oct 9, 2023
a7277e8
parse case status
copelco Oct 10, 2023
c2fb97e
refactor into parser modules
copelco Oct 10, 2023
b5b6388
remove unused loggers
copelco Oct 10, 2023
d654ca2
reconnect with transform_portal_record
copelco Oct 10, 2023
1b75985
fix comment
copelco Oct 10, 2023
dc82bf7
fix tests
copelco Oct 10, 2023
ede3c48
fix import
copelco Oct 18, 2023
770f5f9
start parsing dispositions
copelco Oct 18, 2023
6d8be85
transform offenses
copelco Oct 20, 2023
9519ed9
test parsers
copelco Oct 21, 2023
aa3f672
use localhost during development
copelco Dec 30, 2023
48a49ce
add env name to bookmarklet
copelco Dec 30, 2023
674a676
save source HTML in record data
copelco Dec 30, 2023
dbdbd60
run create_batch_petitions
copelco Dec 30, 2023
ccdb059
Merge remote-tracking branch 'origin/master' into portal-import
copelco Dec 30, 2023
17d12a3
run prettier
copelco Dec 30, 2023
d0fb35e
transform jurisdiction, offense date, offense action, and offense dis…
copelco Dec 31, 2023
1cc027b
remove README
copelco Jan 13, 2024
30ecee0
reorg parser tests
copelco Jan 14, 2024
73f0d56
migrate constants
copelco Jan 14, 2024
109fdf2
Merge branch 'master' into portal-import
copelco Jan 14, 2024
f0b6c2a
add severity constants
copelco Jan 14, 2024
8f8ff44
Merge branch 'portal-import' of github.com:deardurham/dear-petition i…
copelco Jan 14, 2024
b402407
add extract test
copelco Jan 14, 2024
0881bc7
test transform
copelco Jan 14, 2024
f9ea96f
Update dear_petition/portal/etl/parsers/case_info.py
copelco Jan 14, 2024
6ddd3e3
add success alert
copelco Jan 14, 2024
bcbffef
save page address to metadata
copelco Jan 14, 2024
b0c1516
use window.location.origin for bookmarklet
copelco Jan 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
name="password_reset_complete",
),
path("sendgrid/", include("dear_petition.sendgrid.urls")),
path("portal/", include("dear_petition.portal.urls")),
# React SPA:
path(r"", index, name="index"),
re_path(r"^(?:.*)/?$", index, name="index-others"),
Expand Down
8 changes: 8 additions & 0 deletions dear_petition/petition/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@
"No Probable Cause",
"Never To Be Served",
"Deferred Proceeding or Deferred Prosecution Dismissal",
# Portal:
"VD-District Dismissals w/o Leave by DA - No Plea Agreement",
"VD-Superior Dismissals w/o Leave by DA - No Plea Agreement",
)

DISP_METHOD_SUPERSEDING_INDICTMENT = "SUPERSEDING INDICTMENT OR PROCESS"
Expand Down Expand Up @@ -167,3 +170,8 @@
"number_of_charges",
"created",
)

SEVERITY_FELONY = "FELONY"
SEVERITY_MISDEMEANOR = "MISDEMEANOR"
CHARGED_DEGREE_FELONY = ("FH", "FNC")
CHARGED_DEGREE_MISDEMEANOR = ("MNC",)
11 changes: 8 additions & 3 deletions dear_petition/petition/etl/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def import_ciprs_records(files, user, parser_mode, batch_label=""):


def create_batch_petitions(batch):
# Start with a fresh petition list
batch.petitions.all().delete()
# Dismissed
create_petitions_from_records(batch, DISMISSED)
# Not guilty
Expand All @@ -78,9 +80,13 @@ def create_petitions_from_records(batch, form_type):
jurisdiction=petition_type["jurisdiction"],
county=petition_type["county"],
)
sheriff_agency = pm.Contact.get_sherriff_office_by_county(petition_type["county"])
sheriff_agency = pm.Contact.get_sherriff_office_by_county(
petition_type["county"]
)
if sheriff_agency is not None:
logger.info(f"Detected {sheriff_agency.name} as {petition_type['county']} county's sherrif's office. Adding as default agency.")
logger.info(
f"Detected {sheriff_agency.name} as {petition_type['county']} county's sherrif's office. Adding as default agency."
)
petition.agencies.add(sheriff_agency)
link_offense_records(petition)
logger.info(
Expand Down Expand Up @@ -143,7 +149,6 @@ def assign_agencies_to_documents(petition):
first_iteration = True
i = 0
while True:

current_document_agencies = agencies[
i : (i + 3)
] # 3 boxes for agencies per document
Expand Down
Empty file.
6 changes: 6 additions & 0 deletions dear_petition/portal/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class PortalConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "dear_petition.portal"
Empty file.
29 changes: 29 additions & 0 deletions dear_petition/portal/etl/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from bs4 import BeautifulSoup

from .models import CaseSummary, PartyInfo, PortalRecord
from .parsers import case_summary, dispositions, case_info, party_info


def extract_portal_record(source):
"""Parse HTML source to extract eCourts Portal record"""
soup = BeautifulSoup(source, features="html.parser")
return PortalRecord(
case_summary=parse_case_summary(soup),
case_info=case_info.parse_case_information(soup),
party_info=parse_party_information(soup),
dispositions=dispositions.parse_dispositions(soup),
)


def parse_case_summary(soup):
"""Case Summary section"""
return CaseSummary(
case_number=case_summary.parse_case_number(soup) or "",
county=case_summary.parse_county(soup) or "",
court=case_summary.parse_court(soup) or "",
)


def parse_party_information(soup):
"""Party Information section"""
return PartyInfo(defendant_name=party_info.parse_defendant_name(soup))
21 changes: 21 additions & 0 deletions dear_petition/portal/etl/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import logging

from dear_petition.petition.models import Batch, CIPRSRecord
from dear_petition.petition.etl.load import create_batch_petitions

from .transform import transform_portal_record

__all__ = ("import_portal_record",)

logger = logging.getLogger(__name__)


def import_portal_record(user, source: str, location: str):
"""Import eCourts Portal records into models."""
logger.info("Importing Portal record")
data = transform_portal_record(source, location)
batch, _ = Batch.objects.get_or_create(user=user, label=data["Defendant"]["Name"])
record = CIPRSRecord(batch=batch, data=data)
record.refresh_record_from_data()
record.save()
create_batch_petitions(batch)
101 changes: 101 additions & 0 deletions dear_petition/portal/etl/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import datetime as dt
from typing import List, Union

from pydantic import BaseModel, field_validator

from dear_petition.petition import constants


class CaseSummary(BaseModel):
case_number: str
county: str
court: str


class Charge(BaseModel):
number: Union[int, None]
offense: str
statute: str
degree: str
offense_date: Union[dt.date, None]
filed_date: Union[dt.date, None]

@field_validator("offense_date", "filed_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if the date string is in a different format? That's a common issue with CIPRS pdfs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently it'll throw a ValidationError, which hopefully will get logged in Sentry.

>>> from dear_petition.portal.etl.parsers import case_info 
>>> case_info.Charge(
...     number=1,
...     offense="BREAK OR ENTER A MOTOR VEHICLE",
...     statute="14-56",
...     degree="FNC",
...     offense_date="January 1, 2001",
...     filed_date="01/09/2001",
... )
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/copelco/projects/dear-petition/.direnv/python-3.8/lib/python3.8/site-packages/pydantic/main.py", line 164, in __init__
    __pydantic_self__.__pydantic_validator__.validate_python(data, self_instance=__pydantic_self__)
pydantic_core._pydantic_core.ValidationError: 1 validation error for Charge
offense_date
  Value error, time data 'January 1, 2001' does not match format '%m/%d/%Y' [type=value_error, input_value='January 1, 2001', input_type=str]
    For further information visit https://errors.pydantic.dev/2.4/v/value_error

return v

def transform_severity(self):
"""Attempt to convert Portal's degree to CIPRS severity"""
severity = self.degree
if self.degree in constants.CHARGED_DEGREE_FELONY:
severity = constants.SEVERITY_FELONY
elif self.degree in constants.CHARGED_DEGREE_MISDEMEANOR:
severity = constants.SEVERITY_MISDEMEANOR
return severity


class CaseInfo(BaseModel):
case_type: str
case_status: str
case_status_date: Union[dt.date, None]
charges: List[Charge]

@field_validator("case_status_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v


class PartyInfo(BaseModel):
defendant_name: str


class Disposition(BaseModel):
event_date: Union[dt.date, None]
event: str
charge_number: int
charge_offense: str
criminal_disposition: str

@field_validator("event_date", mode="before")
@classmethod
def parse_date(cls, v):
if isinstance(v, str):
return dt.datetime.strptime(v, "%m/%d/%Y")
return v

def is_dismissed(self) -> bool:
return self.criminal_disposition in constants.DISMISSED_DISPOSITION_METHODS

def transform_action(self) -> str:
action = self.event
if self.is_dismissed():
action = constants.CHARGED
return action

def transform_disposition_method(self) -> str:
if self.is_dismissed():
return constants.DISTRICT_COURT_WITHOUT_DA_LEAVE
return self.criminal_disposition


class PortalRecord(BaseModel):
case_summary: CaseSummary
case_info: CaseInfo
party_info: PartyInfo
dispositions: List[Disposition]

def get_charge_by_number(self, charge_number: int):
"""Return matching CaseInfo.charges Charge by charge_number"""
for charge in self.case_info.charges:
if charge.number == charge_number:
return charge

def transform_offense_date(self) -> dt.date:
offense_dates = [c.offense_date for c in self.case_info.charges]
return min(offense_dates).isoformat()
Empty file.
Loading
Loading