From 116a9aaf61afd2b10bda62a34e09cac3e8f182ff Mon Sep 17 00:00:00 2001 From: "Bora M. Alper" Date: Fri, 12 Jun 2020 09:39:45 +0300 Subject: [PATCH] initial commit --- .gitignore | 5 +++ CONTRIBUTING.md | 14 +++++++ LICENSE.txt | 13 +++++++ README.md | 11 ++++++ nm_iconfinder/__init__.py | 1 + nm_iconfinder/nm_iconfinder.py | 69 ++++++++++++++++++++++++++++++++++ requirements-dev.txt | 1 + requirements.txt | 4 ++ setup.cfg | 2 + setup.py | 27 +++++++++++++ 10 files changed, 147 insertions(+) create mode 100644 .gitignore create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 nm_iconfinder/__init__.py create mode 100644 nm_iconfinder/nm_iconfinder.py create mode 100644 requirements-dev.txt create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9207d98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/venv/ +/dist/ +/.idea/ +/MANIFEST +__pycache__ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..77a1d00 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,14 @@ +# Contributing + +## Uploading to PYPI +1. Create tar: + + ```bash + python setup.py sdist + ``` + +2. Upload: + + ```bash + twine upload dist/* + ``` diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..1228468 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,13 @@ +Copyright (c) 2020 newsmail.today + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..99053cc --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# nm_iconfinder + +Find icons of a website given a URL + +## Usage +```python +from nm_iconfinder import icons + +icons("https://blog.newsmail.today/") +``` + diff --git a/nm_iconfinder/__init__.py b/nm_iconfinder/__init__.py new file mode 100644 index 0000000..83cee48 --- /dev/null +++ b/nm_iconfinder/__init__.py @@ -0,0 +1 @@ +from nm_iconfinder.nm_iconfinder import icons diff --git a/nm_iconfinder/nm_iconfinder.py b/nm_iconfinder/nm_iconfinder.py new file mode 100644 index 0000000..25011cb --- /dev/null +++ b/nm_iconfinder/nm_iconfinder.py @@ -0,0 +1,69 @@ +from typing import List, Optional + +from base64 import b64encode +from urllib.parse import urljoin +from io import BytesIO + +from bs4 import BeautifulSoup +from PIL import Image +import requests + +TIMEOUT = 3 # seconds + + +class Icon: + """ + All icons are assumed to be square. + """ + + def __init__(self, url: str, size: int, mimetype: str, data: bytes): + self.url = url + self.size = size + self.mimetype = mimetype + self.data = data + self.data_uri = "data:%s;base64,%s" % (mimetype, b64encode(data).decode("ascii")) + + def __repr__(self): + if "icon" in self.mimetype: + format = "ico" + else: + format = self.mimetype.split("/", 1)[1] + + return "Icon {0} {1}x{1}".format(format, self.size) + + @classmethod + def from_url(cls, url: str) -> Optional["Icon"]: + try: + res = requests.get(url, timeout=TIMEOUT) + res.raise_for_status() + except requests.exceptions.RequestException: + return None + + with BytesIO(res.content) as bio: + img = Image.open(bio) + width, height = img.size + # Ignore non-square Icons + if width != height: + return None + + mimetype = res.headers["Content-Type"].split(";", 1)[0].strip() + return cls(url, width, mimetype, res.content) + + +def icons(url: str) -> List[Icon]: + try: + response = requests.get(url, timeout=TIMEOUT) + response.raise_for_status() + except requests.exceptions.RequestException: + return [] + + soup = BeautifulSoup(response.text, features="lxml") + links = soup.find_all("link", attrs={"rel": "shortcut icon", "href": True}) \ + + soup.find_all("link", attrs={"rel": "icon", "href": True}) \ + + soup.find_all("link", attrs={"rel": "apple-touch-icon-precomposed", "href": True}) \ + + soup.find_all("link", attrs={"rel": "apple-touch-icon", "href": True}) \ + + [{"href": "/favicon.ico"}] + hrefs = set(urljoin(url, link["href"]) for link in links) + + icons_ = [Icon.from_url(urljoin(url, href)) for href in hrefs] # type: List[Optional[Icon]] + return sorted(filter(lambda i: i is not None, icons_), key=lambda i: i.size, reverse=True) diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..af996cf --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +twine diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..00d259a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +beautifulsoup4 +lxml +Pillow +requests diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..b88034e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +description-file = README.md diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..821567a --- /dev/null +++ b/setup.py @@ -0,0 +1,27 @@ +from distutils.core import setup + +setup( + name='nm_iconfinder', + packages=['nm_iconfinder'], + version='0.1', + license='ISC', + description='Find icons of a website given a URL', + author='newsmail.today', + author_email='us@newsmail.today', + url='https://github.com/newsmail-today/iconfinder', + keywords=['icon', 'favicon', 'newsmail'], + install_requires=[ + "beautifulsoup4", + "lxml", + "Pillow", + "requests", + ], + classifiers=[ + # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: ISC License (ISCL)', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + ], +)