Skip to content

Commit

Permalink
RI: Better logging, disable subjects for now
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst committed Jan 4, 2024
1 parent c088259 commit 8f0ead4
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions scrapers/ri/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def digest_results_page(self, nodes):
lines = [(n.text_content().strip(), n) for n in node]
if "No Bills Met this Criteria" in [x[0] for x in lines]:
self.info("No results. Skipping block")
# return []
return []

for line in lines:
line, node = line
Expand Down Expand Up @@ -158,6 +158,7 @@ def get_subject_bill_dict(self, session):
default_headers["ctl00$rilinContent$cbCategory"] = subjects[subject]
default_headers["ctl00$rilinContent$cbYear"] = session

self.info(f"Fetching bills subject mapping for {subject}")
blocks = self.parse_results_page(
self.post(SEARCH_URL, data=default_headers).text
)
Expand Down Expand Up @@ -220,6 +221,9 @@ def scrape_bills(self, chamber, session, subjects):
for idex in idexes:
blocks = "FOO" # Ugh.
while len(blocks) > 0:
self.info(
f"Searching for bills in {chamber} in range {idex} to {idex + MAXQUERY}"
)
default_headers = get_default_headers(SEARCH_URL)
default_headers[FROM] = idex
default_headers[TO] = idex + MAXQUERY
Expand Down Expand Up @@ -291,7 +295,12 @@ def scrape_bills(self, chamber, session, subjects):
def scrape(self, chamber=None, session=None):
chambers = [chamber] if chamber is not None else ["upper", "lower"]

subjects = self.get_subject_bill_dict(session)
# NOTE: disabled by showerst 2024-01-04
# this is making 350 POST requests because there are almost no subjects
# assigned yet, but the 2023 subjects are still posted.
# possibly restore this later.
# subjects = self.get_subject_bill_dict(session)
subjects = {}

for chamber in chambers:
yield from self.scrape_bills(chamber, session, subjects)
Expand Down

0 comments on commit 8f0ead4

Please sign in to comment.