From 76f65ba843fd6c238401babf7abb08b916f48890 Mon Sep 17 00:00:00 2001 From: Max Humber Date: Tue, 21 Apr 2020 09:08:37 -0400 Subject: [PATCH] FIX: find mode=first returns None --- CHANGELOG.md | 4 ++++ README.md | 8 ++++---- gazpacho/soup.py | 2 ++ setup.py | 2 +- tests/test_get.py | 4 ++-- tests/test_soup.py | 20 +++++++++++++++++++- 6 files changed, 32 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 262a0b0..3d4553d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ pip install -U gazpacho --- +#### 0.9.2 (2020-04-21) + +- Fixed `find(..., mode='first')` to return `None` and not an `IndexError` (thanks, [psyonara](https://github.com/maxhumber/gazpacho/issues/14)!) + #### 0.9.1 (2020-02-16) - Fixed `UnicodeEncodeError` lurking beneath `get` (thanks for the "Issue" [mlehotay](https://github.com/mlehotay)!) diff --git a/README.md b/README.md index 6046b39..7130a24 100644 --- a/README.md +++ b/README.md @@ -133,8 +133,8 @@ print(df[['PLAYER', 'TEAM', 'SALARY', 'AGE']].head(3)) # PLAYER TEAM SALARY AGE # 0 1. Mitchell Marner TOR $16,000,000 22 -# 1 2. Auston Matthews TOR $15,900,000 21 -# 2 3. John Tavares TOR $15,900,000 28 +# 1 2. John Tavares TOR $15,900,000 28 +# 2 3. Auston Matthews TOR $15,900,000 21 ``` Powered by gazpacho: @@ -151,8 +151,8 @@ print(df[['PLAYER', 'TEAM', 'SALARY', 'AGE']].head(3)) # PLAYER TEAM SALARY AGE # 0 1. Mitchell Marner TOR $16,000,000 22 -# 1 2. Auston Matthews TOR $15,900,000 21 -# 2 3. John Tavares TOR $15,900,000 28 +# 1 2. John Tavares TOR $15,900,000 28 +# 2 3. Auston Matthews TOR $15,900,000 21 ``` diff --git a/gazpacho/soup.py b/gazpacho/soup.py index 1f3ac7f..4eb1cf4 100644 --- a/gazpacho/soup.py +++ b/gazpacho/soup.py @@ -175,6 +175,8 @@ def find(self, tag, attrs=None, mode="auto", strict=False): self.group = 0 self.groups = [] self.feed(self.html) + if mode in ["auto", "first"] and not self.groups: + return None if mode == "all": return self.groups if mode == "first": diff --git a/setup.py b/setup.py index 253bf55..48b94db 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="gazpacho", - version="0.9.1", + version="0.9.2", description="gazpacho is a web scraping library", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/test_get.py b/tests/test_get.py index 6dbbb60..00113bc 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -27,6 +27,6 @@ def test_get_params(): def test_weird_characters(): - url = 'https://httpbin.org/anything/drãke' + url = "https://httpbin.org/anything/drãke" content = get(url) - assert url == json.loads(content)['url'] + assert url == json.loads(content)["url"] diff --git a/tests/test_soup.py b/tests/test_soup.py index d09c814..d90c6ef 100644 --- a/tests/test_soup.py +++ b/tests/test_soup.py @@ -113,7 +113,7 @@ def test_find_nested_empty_tag(fake_html_3): def test_find_mutliple_imgs(fake_html_3): soup = Soup(fake_html_3) result = soup.find("img") - assert result[1].attrs['src'] == 'bye.jpg' + assert result[1].attrs["src"] == "bye.jpg" def test_remove_tags(fake_html_4): @@ -131,3 +131,21 @@ def test_remove_tags_no_strip(fake_html_4): result == "\n \n I like soup and I really like cold soup\n I guess hot soup is okay too\n \n " ) + + +def test_find_no_match_first(fake_html_1): + soup = Soup(fake_html_1) + result = soup.find("a", mode="first") + assert result == None + + +def test_find_no_match_all(fake_html_1): + soup = Soup(fake_html_1) + result = soup.find("a", mode="all") + assert result == [] + + +def test_find_no_match_auto(fake_html_1): + soup = Soup(fake_html_1) + result = soup.find("a", mode="auto") + assert result == None