diff --git a/dear_petition/portal/etl/parsers/case_info.py b/dear_petition/portal/etl/parsers/case_info.py index 62e5099a..be9d9530 100644 --- a/dear_petition/portal/etl/parsers/case_info.py +++ b/dear_petition/portal/etl/parsers/case_info.py @@ -119,6 +119,7 @@ def parse_charge_offense(tr): return span.text +@catch_parse_error def parse_statute(tr): """ Parse charge statute diff --git a/dear_petition/portal/etl/parsers/utils.py b/dear_petition/portal/etl/parsers/utils.py index 156c8ac2..0be0278d 100644 --- a/dear_petition/portal/etl/parsers/utils.py +++ b/dear_petition/portal/etl/parsers/utils.py @@ -9,10 +9,10 @@ def catch_parse_error(func): """Decorator to catch parsing errors so parsing may continue""" @wraps(func) - def wrapper(*args, **kwargs): + def wrapper(soup): try: - return func(*args, **kwargs) + return func(soup) except Exception: - logger.exception(f"Exception occurred in {func}") + logger.exception(f"Exception occurred parsing: {soup}") return wrapper diff --git a/dear_petition/portal/tests/parsers/conftest.py b/dear_petition/portal/tests/parsers/conftest.py new file mode 100644 index 00000000..4aca58d2 --- /dev/null +++ b/dear_petition/portal/tests/parsers/conftest.py @@ -0,0 +1,15 @@ +import pathlib + +from bs4 import BeautifulSoup +import pytest + + +@pytest.fixture(scope="module") +def sample_record(): + path = pathlib.Path(__file__).parent.parent / "data" / "record.html" + return path.read_text() + + +@pytest.fixture(scope="module") +def soup(sample_record): + return BeautifulSoup(sample_record, features="html.parser") diff --git a/dear_petition/portal/tests/parsers/test_case_info.py b/dear_petition/portal/tests/parsers/test_case_info.py new file mode 100644 index 00000000..50ac7b27 --- /dev/null +++ b/dear_petition/portal/tests/parsers/test_case_info.py @@ -0,0 +1,63 @@ +import pytest + +from bs4 import BeautifulSoup + +from dear_petition.portal.etl.parsers import ( + case_info, +) + + +class TestCaseInfoFullRecord: + def test_parse_case_type(self, soup): + assert case_info.parse_case_type(soup) == "Criminal" + + def test_parse_case_status_date(self, soup): + assert case_info.parse_case_status_date(soup) == "12/01/2001" + + def test_parse_case_status(self, soup): + assert case_info.parse_case_status(soup) == "Disposed" + + def test_parse_charge_number(self, soup): + tr = soup.select_one("div[ng-if*=ShowOffenses] tr.hide-sm") + assert case_info.parse_charge_number(tr) == "01" + + def test_parse_charge_offense(self, soup): + tr = soup.select_one(case_info.SELECT_OFFENSES) + assert case_info.parse_charge_offense(tr) == "EXTRADITION/FUGITIVE OTH STATE" + + def test_parse_statute(self, soup): + tr = soup.select_one(case_info.SELECT_OFFENSES) + assert case_info.parse_statute(tr) == "15A-727;733;734" + + def test_parse_charge_degree(self, soup): + tr = soup.select_one(case_info.SELECT_OFFENSES) + assert case_info.parse_charge_degree(tr) == "FNC" + + def test_parse_charge_offense_date(self, soup): + tr = soup.select_one(case_info.SELECT_OFFENSES) + assert case_info.parse_charge_offense_date(tr) == "01/01/2001" + + def test_parse_charge_file_date(self, soup): + tr = soup.select_one(case_info.SELECT_OFFENSES) + assert case_info.parse_charge_filed_date(tr) == "01/09/2001" + + +@pytest.mark.parametrize( + "parser", + [ + case_info.parse_case_type, + case_info.parse_case_status_date, + case_info.parse_case_status, + case_info.parse_charge_number, + case_info.parse_charge_offense, + case_info.parse_statute, + case_info.parse_charge_degree, + case_info.parse_charge_offense_date, + case_info.parse_charge_filed_date, + ], +) +def test_catch_parse_error(caplog, parser): + """Ensure HTML parse exceptions are captured and logged""" + soup = BeautifulSoup("
", features="html.parser") + parser(soup) + assert str(parser.__name__) in caplog.text diff --git a/dear_petition/portal/tests/parsers/test_case_summary.py b/dear_petition/portal/tests/parsers/test_case_summary.py new file mode 100644 index 00000000..cc66233c --- /dev/null +++ b/dear_petition/portal/tests/parsers/test_case_summary.py @@ -0,0 +1,12 @@ +from dear_petition.portal.etl.parsers import case_summary + + +class TestCaseSummaryFullRecord: + def test_parse_filno(self, soup): + assert case_summary.parse_case_number(soup) == "01CR012345-678" + + def test_parse_county(self, soup): + assert case_summary.parse_county(soup) == "Wake" + + def test_district_court(self, soup): + assert case_summary.parse_court(soup) == "District" diff --git a/dear_petition/portal/tests/parsers/test_dispositions.py b/dear_petition/portal/tests/parsers/test_dispositions.py new file mode 100644 index 00000000..54903edf --- /dev/null +++ b/dear_petition/portal/tests/parsers/test_dispositions.py @@ -0,0 +1,28 @@ +from dear_petition.portal.etl.parsers import dispositions + + +class TestDispositionsFullRecord: + def test_parse_event_date(self, soup): + div = soup.select_one(dispositions.SELECT_DISPOSITIONS) + assert dispositions.parse_event_date(div) == "12/01/2001" + + def test_parse_event(self, soup): + div = soup.select_one(dispositions.SELECT_DISPOSITIONS) + assert dispositions.parse_event(div) == "Disposition" + + def test_parse_charge_number(self, soup): + div = soup.select_one(dispositions.SELECT_DISPOSITIONS) + assert dispositions.parse_charge_number(div) == "01" + + def test_parse_charge_offense(self, soup): + div = soup.select_one(dispositions.SELECT_DISPOSITIONS) + assert ( + dispositions.parse_charge_offense(div) == "EXTRADITION/FUGITIVE OTH STATE" + ) + + def test_parse_criminal_disposition(self, soup): + div = soup.select_one(dispositions.SELECT_DISPOSITIONS) + assert ( + dispositions.parse_criminal_disposition(div) + == "District Dismissed by the Court - No Plea Agreement" + ) diff --git a/dear_petition/portal/tests/parsers/test_party_info.py b/dear_petition/portal/tests/parsers/test_party_info.py new file mode 100644 index 00000000..9b610cbf --- /dev/null +++ b/dear_petition/portal/tests/parsers/test_party_info.py @@ -0,0 +1,6 @@ +from dear_petition.portal.etl.parsers import party_info + + +class TestPartyInfoFullRecord: + def test_parse_defendant_name(self, soup): + assert party_info.parse_defendant_name(soup) == "DOE, JANE EMMA" diff --git a/dear_petition/portal/tests/test_parse.py b/dear_petition/portal/tests/test_parse.py deleted file mode 100644 index 0b60b63d..00000000 --- a/dear_petition/portal/tests/test_parse.py +++ /dev/null @@ -1,100 +0,0 @@ -import pathlib - -from bs4 import BeautifulSoup -import pytest - -from dear_petition.portal.etl.parsers import ( - case_info, - case_summary, - party_info, - dispositions, -) - - -@pytest.fixture(scope="module") -def sample_record(): - path = pathlib.Path(__file__).parent / "data" / "record.html" - return path.read_text() - - -@pytest.fixture(scope="module") -def soup(sample_record): - return BeautifulSoup(sample_record, features="html.parser") - - -class TestCaseInfo: - def test_parse_case_type(self, soup): - assert case_info.parse_case_type(soup) == "Criminal" - - def test_parse_case_status_date(self, soup): - assert case_info.parse_case_status_date(soup) == "12/01/2001" - - def test_parse_case_status(self, soup): - assert case_info.parse_case_status(soup) == "Disposed" - - def test_parse_charge_number(self, soup): - tr = soup.select_one("div[ng-if*=ShowOffenses] tr.hide-sm") - assert case_info.parse_charge_number(tr) == "01" - - def test_parse_charge_offense(self, soup): - tr = soup.select_one(case_info.SELECT_OFFENSES) - assert case_info.parse_charge_offense(tr) == "EXTRADITION/FUGITIVE OTH STATE" - - def test_parse_statute(self, soup): - tr = soup.select_one(case_info.SELECT_OFFENSES) - assert case_info.parse_statute(tr) == "15A-727;733;734" - - def test_parse_charge_degree(self, soup): - tr = soup.select_one(case_info.SELECT_OFFENSES) - assert case_info.parse_charge_degree(tr) == "FNC" - - def test_parse_charge_offense_date(self, soup): - tr = soup.select_one(case_info.SELECT_OFFENSES) - assert case_info.parse_charge_offense_date(tr) == "01/01/2001" - - def test_parse_charge_file_date(self, soup): - tr = soup.select_one(case_info.SELECT_OFFENSES) - assert case_info.parse_charge_filed_date(tr) == "01/09/2001" - - -class TestCaseSummary: - def test_parse_filno(self, soup): - assert case_summary.parse_case_number(soup) == "01CR012345-678" - - def test_parse_county(self, soup): - assert case_summary.parse_county(soup) == "Wake" - - def test_district_court(self, soup): - assert case_summary.parse_court(soup) == "District" - - -class TestDispositions: - def test_parse_event_date(self, soup): - div = soup.select_one(dispositions.SELECT_DISPOSITIONS) - assert dispositions.parse_event_date(div) == "12/01/2001" - - def test_parse_event(self, soup): - div = soup.select_one(dispositions.SELECT_DISPOSITIONS) - assert dispositions.parse_event(div) == "Disposition" - - def test_parse_charge_number(self, soup): - div = soup.select_one(dispositions.SELECT_DISPOSITIONS) - assert dispositions.parse_charge_number(div) == "01" - - def test_parse_charge_offense(self, soup): - div = soup.select_one(dispositions.SELECT_DISPOSITIONS) - assert ( - dispositions.parse_charge_offense(div) == "EXTRADITION/FUGITIVE OTH STATE" - ) - - def test_parse_criminal_disposition(self, soup): - div = soup.select_one(dispositions.SELECT_DISPOSITIONS) - assert ( - dispositions.parse_criminal_disposition(div) - == "District Dismissed by the Court - No Plea Agreement" - ) - - -class TestPartyInfo: - def test_parse_defendant_name(self, soup): - assert party_info.parse_defendant_name(soup) == "DOE, JANE EMMA" diff --git a/dear_petition/portal/tests/test_transform.py b/dear_petition/portal/tests/test_transform.py new file mode 100644 index 00000000..34585ef3 --- /dev/null +++ b/dear_petition/portal/tests/test_transform.py @@ -0,0 +1,23 @@ +import datetime as dt + +from dear_petition.portal.etl.models import Disposition + + +class TestCriminalDisposition: + def test_is_dismissed(self): + assert Disposition( + event_date=dt.date(2000, 1, 1), + event="Disposition", + charge_number=1, + charge_offense="BREAK OR ENTER A MOTOR VEHICLE", + criminal_disposition="VD-District Dismissals w/o Leave by DA - No Plea Agreement", + ).is_dismissed() + + def test_not_is_dismissed(self): + assert not Disposition( + event_date=dt.date(2000, 1, 1), + event="Disposition", + charge_number=1, + charge_offense="BREAK OR ENTER A MOTOR VEHICLE", + criminal_disposition="District Guilty - Judge", + ).is_dismissed()