diff --git a/scrapers/ri/bills.py b/scrapers/ri/bills.py index 99b361521e..886eba1447 100644 --- a/scrapers/ri/bills.py +++ b/scrapers/ri/bills.py @@ -113,7 +113,7 @@ def digest_results_page(self, nodes): lines = [(n.text_content().strip(), n) for n in node] if "No Bills Met this Criteria" in [x[0] for x in lines]: self.info("No results. Skipping block") - # return [] + return [] for line in lines: line, node = line @@ -158,6 +158,7 @@ def get_subject_bill_dict(self, session): default_headers["ctl00$rilinContent$cbCategory"] = subjects[subject] default_headers["ctl00$rilinContent$cbYear"] = session + self.info(f"Fetching bills subject mapping for {subject}") blocks = self.parse_results_page( self.post(SEARCH_URL, data=default_headers).text ) @@ -220,6 +221,9 @@ def scrape_bills(self, chamber, session, subjects): for idex in idexes: blocks = "FOO" # Ugh. while len(blocks) > 0: + self.info( + f"Searching for bills in {chamber} in range {idex} to {idex + MAXQUERY}" + ) default_headers = get_default_headers(SEARCH_URL) default_headers[FROM] = idex default_headers[TO] = idex + MAXQUERY @@ -291,7 +295,12 @@ def scrape_bills(self, chamber, session, subjects): def scrape(self, chamber=None, session=None): chambers = [chamber] if chamber is not None else ["upper", "lower"] - subjects = self.get_subject_bill_dict(session) + # NOTE: disabled by showerst 2024-01-04 + # this is making 350 POST requests because there are almost no subjects + # assigned yet, but the 2023 subjects are still posted. + # possibly restore this later. + # subjects = self.get_subject_bill_dict(session) + subjects = {} for chamber in chambers: yield from self.scrape_bills(chamber, session, subjects)