From 48651a84e0b469fc71e97258fc048087fa64c3ce Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Mon, 10 Jun 2024 16:04:27 -0700 Subject: [PATCH 01/11] Consolidate parsers --- jlcpcb_scraper/parsers.py | 87 +++++++++++++++++++++++++++ jlcpcb_scraper/parsers/capacitance.py | 63 ------------------- jlcpcb_scraper/parsers/current.py | 74 ----------------------- jlcpcb_scraper/parsers/dielectric.py | 15 ----- jlcpcb_scraper/parsers/inductance.py | 71 ---------------------- jlcpcb_scraper/parsers/resistance.py | 66 -------------------- jlcpcb_scraper/parsers/voltage.py | 73 ---------------------- 7 files changed, 87 insertions(+), 362 deletions(-) create mode 100644 jlcpcb_scraper/parsers.py delete mode 100644 jlcpcb_scraper/parsers/capacitance.py delete mode 100644 jlcpcb_scraper/parsers/current.py delete mode 100644 jlcpcb_scraper/parsers/dielectric.py delete mode 100644 jlcpcb_scraper/parsers/inductance.py delete mode 100644 jlcpcb_scraper/parsers/resistance.py delete mode 100644 jlcpcb_scraper/parsers/voltage.py diff --git a/jlcpcb_scraper/parsers.py b/jlcpcb_scraper/parsers.py new file mode 100644 index 0000000..b078b6b --- /dev/null +++ b/jlcpcb_scraper/parsers.py @@ -0,0 +1,87 @@ +from functools import partial + +_multipliers = { + "p": 1.0e-12, + "n": 1.0e-9, + "u": 1.0e-6, + "µ": 1.0e-6, # duplicate ways to write micro + "m": 1.0e-3, + # skip x1 + "k": 1000.0, + "M": 1000_000.0, +} + +def _parse(unit: str, description: str) -> float | None: + """Parse the capacitance from a component description""" + if description is None: + return None + + for part in description.split(' '): + if len(part) < 2: + continue + + if part[-1] != unit: + continue + + if str.isdigit(part[-2]): + multiplier = 1 + numeric_part = part[:-1] + else: + if part[-2] not in _multipliers: + continue + multiplier = _multipliers[part[-2]] + numeric_part = part[:-2] + + try: + return float(numeric_part) * multiplier + except ValueError: + pass + +capacitance = partial(_parse, "F") +resistance = partial(_parse, "Ω") +inductance = partial(_parse, "H") +power = partial(_parse, "W") +current = partial(_parse, "A") +voltage = partial(_parse, "V") + +def dielectric(description: str) -> str | None: + """Parse the dielectric from a component description""" + dielectric_value = None + if description is None: + return dielectric_value + + if "C0G" in description: + dielectric_value = "C0G" + elif "X7R" in description: + dielectric_value = "X7R" + elif "X5R" in description: + dielectric_value = "X5R" + elif "Y5V" in description: + dielectric_value = "Y5V" + + return dielectric_value + +def percent(description: str | None) -> float | None: + """Parse the percentage from a component description""" + if description is None: + return None + + for fragment in description.split(' '): + if fragment.endswith("%"): + while not fragment[0].isdigit(): + fragment = fragment[1:] + try: + return float(fragment[:-1]) + except ValueError: + pass + +def price(price_description: str) -> float | None: + """ + string input example: "'20-180:0.004285714,200-780:0.003485714,1600-9580:0.002771429,800-1580:0.003042857,9600-19980:0.002542857,20000-:0.002414286'" + output example: 0.004285714 + """ + if not price_description: + raise ValueError("Price is empty") + + price_groups = price_description.split(",") + return float(price_groups[0].split(":")[1]) diff --git a/jlcpcb_scraper/parsers/capacitance.py b/jlcpcb_scraper/parsers/capacitance.py deleted file mode 100644 index 1c84a5b..0000000 --- a/jlcpcb_scraper/parsers/capacitance.py +++ /dev/null @@ -1,63 +0,0 @@ -def parse_capacitance_description(component_description): - capacitance_value = None - if component_description is None: - return capacitance_value - description_parts = component_description.split(' ') - for part in description_parts: - if part.endswith("pF"): - numeric_part = part.rstrip("pF") - try: - value = float(numeric_part) - capacitance_value = value - break - except ValueError: - pass - elif part.endswith("nF"): - numeric_part = part.rstrip("nF") - try: - value = float(numeric_part) - capacitance_value = value * 1000.0 - break - except ValueError: - pass - elif part.endswith("uF") or part.endswith("µF"): - numeric_part = part.rstrip("uF").rstrip("µF") - try: - value = float(numeric_part) - capacitance_value = value * 1000000.0 - break - except ValueError: - pass - elif part.endswith("mF"): - numeric_part = part.rstrip("mF") - try: - value = float(numeric_part) - capacitance_value = value * 1000000000.0 - break - except ValueError: - pass - elif part.endswith("kF"): - numeric_part = part.rstrip("kF") - try: - value = float(numeric_part) - capacitance_value = value * 1000000000000000.0 - break - except ValueError: - pass - elif part.endswith("MF"): - numeric_part = part.rstrip("MF") - try: - value = float(numeric_part) - capacitance_value = value * 1000000000000000000.0 - break - except ValueError: - pass - elif part.endswith('F'): - numeric_part = part.rstrip('F') - try: - value = float(numeric_part) - capacitance_value = value * 1000000000000.0 - break - except ValueError: - pass - return capacitance_value \ No newline at end of file diff --git a/jlcpcb_scraper/parsers/current.py b/jlcpcb_scraper/parsers/current.py deleted file mode 100644 index 754a092..0000000 --- a/jlcpcb_scraper/parsers/current.py +++ /dev/null @@ -1,74 +0,0 @@ -def parse_current_description(component_description): - current_value = None - - if component_description is None: - return current_value - - description_parts = component_description.split(' ') - for part in description_parts: - if part.endswith("pA"): - numeric_part = part.rstrip("pA") - try: - value = float(numeric_part) - current_value = value - break - except ValueError: - pass - elif part.endswith("nA"): - numeric_part = part.rstrip("nA") - try: - value = float(numeric_part) - current_value = value * 1e3 - break - except ValueError: - pass - elif part.endswith("uA"): - numeric_part = part.rstrip("uA") - try: - value = float(numeric_part) - current_value = value * 1e6 - break - except ValueError: - pass - elif part.endswith("μA"): - numeric_part = part.rstrip("μA") - try: - value = float(numeric_part) - current_value = value * 1e6 - break - except ValueError: - pass - elif part.endswith("mA"): - numeric_part = part.rstrip("mA") - try: - value = float(numeric_part) - current_value = value * 1e9 - break - except ValueError: - pass - elif part.endswith("kA"): - numeric_part = part.rstrip("kA") - try: - value = float(numeric_part) - current_value = value * 1e15 - break - except ValueError: - pass - elif part.endswith("MA"): - numeric_part = part.rstrip("MA") - try: - value = float(numeric_part) - current_value = value * 1e18 - break - except ValueError: - pass - elif part.endswith('A'): - numeric_part = part.rstrip('A') - try: - value = float(numeric_part) - current_value = value * 1e12 - break - except ValueError: - pass - - return current_value \ No newline at end of file diff --git a/jlcpcb_scraper/parsers/dielectric.py b/jlcpcb_scraper/parsers/dielectric.py deleted file mode 100644 index 77ac46e..0000000 --- a/jlcpcb_scraper/parsers/dielectric.py +++ /dev/null @@ -1,15 +0,0 @@ -def parse_dielectric_description(component_description): - dielectric_value = None - if component_description is None: - return dielectric_value - - if "C0G" in component_description: - dielectric_value = "C0G" - elif "X7R" in component_description: - dielectric_value = "X7R" - elif "X5R" in component_description: - dielectric_value = "X5R" - elif "Y5V" in component_description: - dielectric_value = "Y5V" - - return dielectric_value \ No newline at end of file diff --git a/jlcpcb_scraper/parsers/inductance.py b/jlcpcb_scraper/parsers/inductance.py deleted file mode 100644 index 46d022e..0000000 --- a/jlcpcb_scraper/parsers/inductance.py +++ /dev/null @@ -1,71 +0,0 @@ -def parse_inductance_description(component_description): - inductance_value = None - if component_description is None: - return inductance_value - description_parts = component_description.split(' ') - for part in description_parts: - if part.endswith("pH"): - numeric_part = part.rstrip("pH") - try: - value = float(numeric_part) - inductance_value = value - break - except ValueError: - pass - elif part.endswith("nH"): - numeric_part = part.rstrip("nH") - try: - value = float(numeric_part) - inductance_value = value * 1_000 - break - except ValueError: - pass - elif part.endswith("uH"): - numeric_part = part.rstrip("uH") - try: - value = float(numeric_part) - inductance_value = value * 1_000_000 - break - except ValueError: - pass - elif part.endswith("µH"): - numeric_part = part.rstrip("µH") - try: - value = float(numeric_part) - inductance_value = value * 1_000_000 - break - except ValueError: - pass - elif part.endswith("mH"): - numeric_part = part.rstrip("mH") - try: - value = float(numeric_part) - inductance_value = value * 1_000_000_000 - break - except ValueError: - pass - elif part.endswith("kH"): - numeric_part = part.rstrip("kH") - try: - value = float(numeric_part) - inductance_value = value * 1_000_000_000_000 - break - except ValueError: - pass - elif part.endswith("MH"): - numeric_part = part.rstrip("MH") - try: - value = float(numeric_part) - inductance_value = value * 1_000_000_000_000_000 - break - except ValueError: - pass - elif part.endswith('H'): - numeric_part = part.rstrip('H') - try: - value = float(numeric_part) - inductance_value = value * 1_000_000_000_000_000_000 - break - except ValueError: - pass - return inductance_value \ No newline at end of file diff --git a/jlcpcb_scraper/parsers/resistance.py b/jlcpcb_scraper/parsers/resistance.py deleted file mode 100644 index 6907ec0..0000000 --- a/jlcpcb_scraper/parsers/resistance.py +++ /dev/null @@ -1,66 +0,0 @@ -def parse_resistance_description(component_description): - resistance_value = None - - if component_description is None: - return resistance_value - - description_parts = component_description.split(' ') - for part in description_parts: - if part.endswith("pΩ"): - numeric_part = part.rstrip("pΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e-12 - break - except ValueError: - pass - elif part.endswith("nΩ"): - numeric_part = part.rstrip("nΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e-9 - break - except ValueError: - pass - elif part.endswith("uΩ") or part.endswith("μΩ"): - numeric_part = part.rstrip("uΩ").rstrip("μΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e-6 - break - except ValueError: - pass - elif part.endswith("mΩ"): - numeric_part = part.rstrip("mΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e-3 - break - except ValueError: - pass - elif part.endswith("kΩ"): - numeric_part = part.rstrip("kΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e3 - break - except ValueError: - pass - elif part.endswith("MΩ"): - numeric_part = part.rstrip("MΩ") - try: - value = float(numeric_part) - resistance_value = value * 1e6 - break - except ValueError: - pass - elif part.endswith('Ω'): - numeric_part = part.rstrip('Ω') - try: - value = float(numeric_part) - resistance_value = value - break - except ValueError: - pass - - return resistance_value \ No newline at end of file diff --git a/jlcpcb_scraper/parsers/voltage.py b/jlcpcb_scraper/parsers/voltage.py deleted file mode 100644 index 789785a..0000000 --- a/jlcpcb_scraper/parsers/voltage.py +++ /dev/null @@ -1,73 +0,0 @@ -def parse_voltage_description(component_description): - voltage_value = None - - if component_description is None: - return voltage_value - - description_parts = component_description.split(' ') - for part in description_parts: - if part.endswith("pV"): - numeric_part = part.rstrip("pV") - try: - value = float(numeric_part) - voltage_value = value - break - except ValueError: - pass - elif part.endswith("nV"): - numeric_part = part.rstrip("nV") - try: - value = float(numeric_part) - voltage_value = value * 1e3 - break - except ValueError: - pass - elif part.endswith("uV"): - numeric_part = part.rstrip("uV") - try: - value = float(numeric_part) - voltage_value = value * 1e6 - break - except ValueError: - pass - elif part.endswith("μV"): - numeric_part = part.rstrip("μV") - try: - value = float(numeric_part) - voltage_value = value * 1e6 - break - except ValueError: - pass - elif part.endswith("mV"): - numeric_part = part.rstrip("mV") - try: - value = float(numeric_part) - voltage_value = value * 1e9 - break - except ValueError: - pass - elif part.endswith("kV"): - numeric_part = part.rstrip("kV") - try: - value = float(numeric_part) - voltage_value = value * 1e15 - break - except ValueError: - pass - elif part.endswith("MV"): - numeric_part = part.rstrip("MV") - try: - value = float(numeric_part) - voltage_value = value * 1e18 - break - except ValueError: - pass - elif part.endswith('V'): - numeric_part = part.rstrip('V') - try: - value = float(numeric_part) - voltage_value = value * 1e12 - break - except ValueError: - pass - return voltage_value \ No newline at end of file From 5ca3440c0b282e188471f5cf9959cb0c43e84d61 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Mon, 10 Jun 2024 16:07:46 -0700 Subject: [PATCH 02/11] Add tests for parsers. Remove tests for old scraper --- tests/test_parsers.py | 7 +++++++ tests/test_scraper.py | 13 ------------- 2 files changed, 7 insertions(+), 13 deletions(-) create mode 100644 tests/test_parsers.py delete mode 100644 tests/test_scraper.py diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 0000000..1d483ea --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,7 @@ + +def test_voltage(): + from jlcpcb_scraper.parsers import voltage + assert voltage("10V") == 10 + assert voltage("10V 20V") == 10 + assert voltage("20V 10V") == 20 + assert voltage("432mV") == 0.432 diff --git a/tests/test_scraper.py b/tests/test_scraper.py deleted file mode 100644 index 9eac500..0000000 --- a/tests/test_scraper.py +++ /dev/null @@ -1,13 +0,0 @@ -import unittest -from jlcpcb_scraper.scraper import JlcpcbScraper - -class TestJlcpcbScraper(unittest.TestCase): - def setUp(self): - self.scraper = JlcpcbScraper() - - def test_get_parts(self): - # TODO - pass - -if __name__ == '__main__': - unittest.main() \ No newline at end of file From c85e0858ea8bec607207b5cb8a150b4211cf1b33 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Mon, 10 Jun 2024 16:10:02 -0700 Subject: [PATCH 03/11] Make a scraper that yields relatively raw JLCPCB data --- jlcpcb_scraper/scraper.py | 227 ++++++++++++-------------------------- 1 file changed, 73 insertions(+), 154 deletions(-) diff --git a/jlcpcb_scraper/scraper.py b/jlcpcb_scraper/scraper.py index a1da9c8..93f3d80 100644 --- a/jlcpcb_scraper/scraper.py +++ b/jlcpcb_scraper/scraper.py @@ -2,14 +2,12 @@ import logging import time from datetime import datetime, timedelta -from dataclasses import dataclass, field import requests from requests.adapters import HTTPAdapter from fake_useragent import UserAgent -from bs4 import BeautifulSoup, SoupStrainer +from typing import Generator -from models import Part, Category, create_or_update_category, create_or_update_part logger = logging.getLogger(__name__) @@ -19,182 +17,103 @@ class JlcpcbScraper: - def __init__(self, base_url='https://jlcpcb.com/parts', categories: list[Category] = []): + def __init__( + self, + key: str | None = None, + secret: str | None = None, + ): + # Session configuration self.session = requests.Session() ua = UserAgent() - self.session.headers.update({ - "Accept": "*/*", - "Accept-Encoding": "gzip, deflate", - "Host": "jlcpcb.com", - "User-Agent": str(ua.chrome), - }) - self.session.mount('https://', HTTPAdapter(max_retries=3)) - self.base_url = base_url - self.session.get(self.base_url) - self.all_links = [] - self.categories: list[Category] = categories + self.session.headers.update( + { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate", + "Host": "jlcpcb.com", + "User-Agent": str(ua.chrome), + } + ) + self.session.mount("https://", HTTPAdapter(max_retries=3)) + + # State info + self.last_key = None + + # Token info self.token: str | None = None self.token_expires: datetime | None = None - self.key = JLCPCB_KEY - self.secret = JLCPCB_SECRET + self.key = key or JLCPCB_KEY + self.secret = secret or JLCPCB_SECRET self._obtain_token() - logger.info('JlcpcbScraper initialized') + + # Wew! + logger.info("JlcpcbScraper initialized") def _obtain_token(self) -> None: if not self.key or not self.secret: - raise RuntimeError("JLCPCB_KEY and JLCPCB_SECRET environment variables must be set") - body = { - "appKey": self.key, - "appSecret": self.secret - } + raise RuntimeError( + "JLCPCB_KEY and JLCPCB_SECRET environment variables must be set" + ) + body = {"appKey": self.key, "appSecret": self.secret} headers = { "Content-Type": "application/json", } - resp = requests.post("https://jlcpcb.com/external/genToken", - json=body, headers=headers) + resp = requests.post( + "https://jlcpcb.com/external/genToken", + json=body, + headers=headers, + timeout=30, + ) + if resp.status_code != 200: raise RuntimeError(f"Cannot obtain token {resp.json()}") data = resp.json() if data["code"] != 200: raise RuntimeError(f"Cannot obtain token {data}") + self.token = data["data"] - self.session.headers.update({ - "externalApiToken": self.token, - }) + self.session.headers.update( + { + "externalApiToken": self.token, + } + ) self.token_expires = datetime.now() + timedelta(seconds=1800) - def get_all_links(self): - response = self.session.get(self.base_url+'/all-electronic-components') - soup = BeautifulSoup(response.text, 'html.parser') - for link in soup.find_all('a', href=True): - if '/parts/1st/' in link['href'] or '/parts/2nd/' in link['href']: - self.all_links.append(link['href']) - logger.info('All links fetched') - - def extract_categories(self, session, response): - new_categories = [] - for component in response.get('data', {}).get('componentInfos', []): - category_name = component.get('firstCategory') - subcategory_name = component.get('secondCategory') - if not self.category_exists(subcategory_name): - new_category = Category(name=category_name, subcategory_name=subcategory_name) - category = create_or_update_category(session, new_category) - if category not in self.categories: - new_categories.append(new_category) - self.categories.append(new_category) - yield [], new_categories - - def get_parts(self, session): - # first query - response = self.session.post('https://jlcpcb.com/external/component/getComponentInfos') - if response.status_code != 200: - logger.error(f"Cannot obtain parts {response.json()}") - raise RuntimeError(f"Cannot obtain parts {response.json()}") - response = response.json() - if not response.get("code") == 200: - logger.error(f"Cannot obtain parts {response}") - raise RuntimeError(f"Cannot obtain parts {response}") - if not response.get('data', {}).get('componentInfos', []): - yield None, None - return - yield from self.extract_categories(session, response) - self.parse_pagination(response) - yield from self.parse_parts(session, response) - # subsequent page queries - request_count = 1 - while response['data']['componentInfos']: - time.sleep(0.2) - logger.info(f'Fetching page {request_count}') - request_count + 1 - response = self.session.post('https://jlcpcb.com/external/component/getComponentInfos', data={"lastKey": self.last_key}) + def get_parts(self) -> Generator[dict, None, None]: + request_count = 0 + while True: + logger.info("Fetching page %s", request_count) + request_count += 1 + response = self.session.post( + "https://jlcpcb.com/external/component/getComponentInfos", + data={"lastKey": self.last_key} if self.last_key else None, + ) if response.status_code != 200: - logger.error(f"Cannot obtain parts, status code not 200: {response}") - yield None, None + logger.error("Cannot obtain parts, status code not 200: %s", response) return - response = response.json() - if not response.get("code") == 200: - logger.error(f"Cannot obtain parts, internal status code not 200: {response}") - yield None, None + + response_data: dict = response.json() + if not response_data.get("code") == 200: + logger.error( + "Cannot obtain parts, internal status code not 200: %s", + response_data, + ) + return + + if not response_data.get("data", {}).get("componentInfos"): + logger.info("No more parts to fetch") return - if response.get('data', {}).get('componentInfos', []) is None: - yield None, None - logger.info('No more parts to fetch') - yield from self.extract_categories(session, response) - self.parse_pagination(response) - yield from self.parse_parts(session, response) + + self._parse_pagination(response_data) + + yield from response_data["data"]["componentInfos"] + if self.token_expires < datetime.now(): self._obtain_token() - def parse_parts(self, session, response): - parts = response['data']['componentInfos'] - all_categories = [] - all_parts = [] - for part in parts: - if part['stock'] == 0: - continue - part_subcategory = part['secondCategory'] - if not self.category_exists(part_subcategory): - new_category = Category(name=part['firstCategory'], subcategory_name=part_subcategory) - category = create_or_update_category(session, new_category) - if category not in self.categories: - all_categories.append(new_category) - self.categories.append(new_category) - subcategory_id = self.get_category(part_subcategory).id - part_price = self.get_part_price(part['price']) - part_instance = Part( - lcsc=part['lcscPart'], - category_id=subcategory_id, - mfr=part['mfrPart'], - package=part['package'], - joints=int(part['solderJoint']), - manufacturer=part['manufacturer'], - basic=part['libraryType'] == 'base', - description=part['description'], - datasheet=part['datasheet'], - stock=int(part['stock']), - price=part_price, - last_update=datetime.now() # Replace with the actual value based on your logic - ) - all_parts.append(part_instance) - create_or_update_part(session, part_instance) - - # Add parts to subcategories - yield all_parts, all_categories + # Delay to avoid overwhelming the server + time.sleep(0.2) - def parse_pagination(self, response): - self.last_key = response.get('data', {}).get('lastKey', None) + def _parse_pagination(self, response): + self.last_key = response.get("data", {}).get("lastKey", None) if not self.last_key: raise RuntimeError("Cannot obtain last key") - - def get_part_price(self, price: str) -> float | None: - ''' - string input example: "'20-180:0.004285714,200-780:0.003485714,1600-9580:0.002771429,800-1580:0.003042857,9600-19980:0.002542857,20000-:0.002414286'" - output example: 0.004285714 - ''' - try: - if not price: - return None - price = price.split(',') - price = price[0].split(':') - return float(price[1]) - except Exception as e: - logger.error(f'Error parsing price: {e}') - return None - - def category_exists(self, subcategory_name: str) -> bool: - return any([category.subcategory_name == subcategory_name for category in self.categories]) - - def get_category(self, subcategory_name: str) -> Category | None: - return next((category for category in self.categories if category.subcategory_name == subcategory_name), None) - - def get_category_by_id(self, category_id: int) -> Category | None: - return next((category for category in self.categories if category.id == category_id), None) - - -if __name__ == '__main__': - scraper = JlcpcbScraper() - scraper.get_parts() - # save to pkl file - import pickle - with open('jlcpcb.pkl', 'wb') as f: - pickle.dump(scraper.categories, f) \ No newline at end of file From d2407b70a10961e3e2a390ed1fc4d8cce948a4fd Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Mon, 10 Jun 2024 16:12:11 -0700 Subject: [PATCH 04/11] Add standard Python.gitignore + example sqlite db --- .gitignore | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a907abf..a59e9d1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,174 @@ node_modules/ *.egg-info/ # Ignore virtual environment -venv/ \ No newline at end of file +venv/ + +##### Python.gitignore ##### + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + + +#### Additional things #### +# sqlite databases +example.db From 0f0b2b8eff89df9cd138df29f132344e595f7737 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Tue, 9 Jul 2024 17:53:36 -0700 Subject: [PATCH 05/11] Checkpoint of everything generally working --- jlcpcb_scraper/Untitled-1.py | 73 ++++++++++ jlcpcb_scraper/base_class.py | 43 ------ jlcpcb_scraper/config.py | 8 +- jlcpcb_scraper/do_configure.py | 18 +++ jlcpcb_scraper/factory.py | 248 +++++++++++++++++++++++++++++++++ jlcpcb_scraper/main.py | 78 ++++++----- jlcpcb_scraper/models.py | 134 +++++++++--------- jlcpcb_scraper/parsers.py | 18 +-- requirements.txt | 4 +- tests/test_parsers.py | 9 ++ 10 files changed, 466 insertions(+), 167 deletions(-) create mode 100644 jlcpcb_scraper/Untitled-1.py delete mode 100755 jlcpcb_scraper/base_class.py create mode 100644 jlcpcb_scraper/do_configure.py create mode 100644 jlcpcb_scraper/factory.py diff --git a/jlcpcb_scraper/Untitled-1.py b/jlcpcb_scraper/Untitled-1.py new file mode 100644 index 0000000..d340c20 --- /dev/null +++ b/jlcpcb_scraper/Untitled-1.py @@ -0,0 +1,73 @@ +# %% +try: + import IPython +except ImportError: + print("IPython not found") +else: + ipython = IPython.get_ipython() + ipython.magic("load_ext autoreload") + ipython.magic("autoreload 2") + print("Enabled autoreload") + + +import logging +from jlcpcb_scraper.scraper import JlcpcbScraper + +# %% +logging.basicConfig(level=logging.DEBUG) +logging.info("Starting JLCPCB scraper") + + +JLCPCB_KEY="app_key4699520" +JLCPCB_SECRET="app_secret4699520" + +scraper = JlcpcbScraper( + key=JLCPCB_KEY, + secret=JLCPCB_SECRET +) + +# %% +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from jlcpcb_scraper.models import Base, create_or_update_part + +# Create an SQLite database and a session +engine = create_engine('sqlite:///example.db') +Base.metadata.create_all(engine) + +Session = sessionmaker(bind=engine) +session = Session() + +# %% +from jlcpcb_scraper.model_factory import process +first_2000_parts = [] + +for i, part_data in enumerate(scraper.get_parts()): + if i > 2000: + break + first_2000_parts.append(part_data) + +# %% +for i, part_data in enumerate(first_2000_parts): + logging.debug(f"Processing part {i}") + if part := await process(part_data): + logging.debug(f"Part {i} accepted") + create_or_update_part(session, part) + +session.commit() + +# %% +from jlcpcb_scraper.models import Resistor, Capacitor + +for i, p in enumerate(session.query(Resistor).all()): + print(p.__dict__) + + if i > 100: + break + +for i, p in enumerate(session.query(Capacitor).all()): + print(p.__dict__) + + if i > 100: + break +# %% diff --git a/jlcpcb_scraper/base_class.py b/jlcpcb_scraper/base_class.py deleted file mode 100755 index 7a185d4..0000000 --- a/jlcpcb_scraper/base_class.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any - -from sqlalchemy.orm import Session, relationship -from sqlalchemy.inspection import inspect -from sqlalchemy.orm.attributes import InstrumentedAttribute -from sqlalchemy.ext.declarative import as_declarative, declared_attr - - -@as_declarative() -class Base: - id: Any - __name__: str - # Generate __tablename__ automatically - @declared_attr - def __tablename__(cls) -> str: - return cls.__name__.lower() - - @classmethod - def get_writeable_properties(cls): - '''Return a dict because a lookup is O(1) in a hashmap and O(n) in a list''' - relationships = { x : None for x in inspect(cls).relationships.keys() } - return { - attr : None for attr, value in vars(cls).items() if - attr not in relationships and - ((isinstance(value, property) and value.fset is not None) or isinstance(value, InstrumentedAttribute)) - - } - - @classmethod - def approx_rowcount(cls, db : Session) -> int: - result = db.execute(f"""SELECT - (reltuples/relpages) * ( - pg_relation_size('{cls.__tablename__}') / - (current_setting('block_size')::integer) - ) - FROM pg_class where relname = '{cls.__tablename__}';""" - ).scalar() - return int(result) - - def as_dict(self) -> dict: - return { - col.name: getattr(self, col.name) for col in self.__table__.columns - } diff --git a/jlcpcb_scraper/config.py b/jlcpcb_scraper/config.py index 46e8a6a..d94c717 100644 --- a/jlcpcb_scraper/config.py +++ b/jlcpcb_scraper/config.py @@ -7,5 +7,11 @@ def __init__(self): # Define your variables here self.SQLALCHEMY_DATABASE_URI = os.getenv('SQLALCHEMY_DATABASE_URI') + self.POSTGRES_USER = os.getenv('POSTGRES_USER') + self.POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD') + self.POSTGRES_DB = os.getenv('POSTGRES_DB') + self.JLCPCB_KEY = os.getenv('JLCPCB_KEY') + self.JLCPCB_SECRET = os.getenv('JLCPCB_SECRET') -config = Config() \ No newline at end of file + +config = Config() diff --git a/jlcpcb_scraper/do_configure.py b/jlcpcb_scraper/do_configure.py new file mode 100644 index 0000000..aaf8aa7 --- /dev/null +++ b/jlcpcb_scraper/do_configure.py @@ -0,0 +1,18 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from alembic.config import Config +from alembic import command + +from config import config + +# 1. Run Alembic automatic revisions +alembic_cfg = Config("alembic.ini") +command.revision(alembic_cfg, autogenerate=True, message="Automatic revisions") + +# 2. Run Alembic migrations on startup +command.upgrade(alembic_cfg, "head") + +# 3. Get all current Category models from the database with sqlalchemy +engine = create_engine(config.SQLALCHEMY_DATABASE_URI) +Session = sessionmaker(bind=engine) +session = Session() diff --git a/jlcpcb_scraper/factory.py b/jlcpcb_scraper/factory.py new file mode 100644 index 0000000..174d3c9 --- /dev/null +++ b/jlcpcb_scraper/factory.py @@ -0,0 +1,248 @@ +""" +Create JLCPCB models +""" + +import datetime +import logging +from typing import Type, TypeVar + +from jlcpcb_scraper import parsers +from jlcpcb_scraper.models import Capacitor, Part, Resistor + +T = TypeVar("T", bound="AbstractModelFactory") + +log = logging.getLogger(__name__) + + +# Example data: +# { +# 'lcscPart': 'C1002', +# 'firstCategory': 'Filters', +# 'secondCategory': 'Ferrite Beads', +# 'mfrPart': 'GZ1608D601TF', +# 'solderJoint': '2', +# 'manufacturer': 'Sunlord', +# 'libraryType': 'base', +# 'description': '450mΩ ±25% 600Ω@100MHz 0603 Ferrite Beads ROHS', +# 'datasheet': 'https://wmsc.lcsc.com/wmsc/upload/file/pdf/v2/lcsc/2310301640_Sunlord-GZ1608D601TF_C1002.pdf', +# 'price': '20-180:0.004285714,200-780:0.003485714,1600-9580:0.002771429,800-1580:0.003042857,9600-19980:0.002542857,20000-:0.002414286', +# 'stock': 433437, +# 'package': '0603' +# } + +# _known_footprints = { +# # "01005": "C01005", +# # "0201": "C0201", +# # "0402": "C0402", +# # "0504": "C0504", +# # "0603": "C0603", +# # "0805": "C0805", +# # "1206": "C1206", +# # "1210": "C1210", +# # "1812": "C1812", +# # "1825": "C1825", +# # "2220": "C2220", +# # "2225": "C2225", +# # "3640": "C3640", +# # "0201": "L0201", +# # "0402": "L0402", +# # "0603": "L0603", +# # "0805": "L0805", +# # "1206": "L1206", +# # "1210": "L1210", +# # "1806": "L1806", +# # "1812": "L1812", +# # "2010": "L2010", +# # "2512": "L2512", +# "01005": "R01005", +# "0201": "R0201", +# "0402": "R0402", +# "0603": "R0603", +# "0805": "R0805", +# "1206": "R1206", +# "1210": "R1210", +# "2512": "R2512", +# } + +class AbstractModelFactory: + factories: list[Type["AbstractModelFactory"]] = [] + + @classmethod + def for_me(self, data: dict) -> bool: + """Check if the data is for this factory""" + raise NotImplementedError + + async def build(self, data: dict) -> Part | None: + """Build a model from the data""" + raise NotImplementedError + + @classmethod + def register(cls) -> Type[T]: + return cls.factories.append(cls) + + def _get_common(self, data: dict) -> dict | None: + # FIXME: these are really daft criteria for the rating + price = parsers.price(data.get("price")) + stock = int(data.get("stock", 0)) + stock_cost = 20 if stock < 50 else 0 + rating = price + stock_cost + + return { + "price": price, + "stock": stock, + "overhead_cost": 0, # FIXME: we should find this data somewhere + "rating": rating, + "lcsc_id": data.get("lcscPart"), + "mpn": data.get("mfrPart"), + "package": data.get("package"), + "last_update": datetime.datetime.now(datetime.UTC), + # footprint_name isn't common because package -> footprint map + # is different for each component type + } + + +class ResistorFactory(AbstractModelFactory): + _known_footprints = { + "01005": "R01005", + "0201": "R0201", + "0402": "R0402", + "0603": "R0603", + "0805": "R0805", + "1206": "R1206", + "1210": "R1210", + "2512": "R2512", + } + + @classmethod + def for_me(cls, data: dict) -> bool: + return data.get("firstCategory") == "Resistors" + + async def build(self, data: dict) -> Resistor | None: + nominal_resistance = parsers.resistance(data.get("description")) + if not nominal_resistance: + # Handle both zero and None + log.debug("Rejected because resistance couldn't be found") + return + tolerance_pct = parsers.percent(data.get("description")) + if tolerance_pct is None: + log.debug("Rejected because tolerance couldn't be found") + return + resistance_ohms_min = nominal_resistance * (1 - tolerance_pct / 100) + resistance_ohms_max = nominal_resistance * (1 + tolerance_pct / 100) + + common = self._get_common(data) + if not common: + log.debug("Rejected because common data couldn't be found") + return + + return Resistor( + footprint_name = self._known_footprints.get(data.get("package")), + resistance_ohms_min = resistance_ohms_min, + resistance_ohms_max = resistance_ohms_max, + rated_power_watts = parsers.power(data.get("description")), + operating_temp_celsius_min = None, # TODO: + operating_temp_celsius_max = None, # TODO: + **common + ) + + +ResistorFactory.register() + + +class CapacitorFactory(AbstractModelFactory): + _known_footprints = { + "01005": "C01005", + "0201": "C0201", + "0402": "C0402", + "0504": "C0504", + "0603": "C0603", + "0805": "C0805", + "1206": "C1206", + "1210": "C1210", + "1812": "C1812", + "1825": "C1825", + "2220": "C2220", + "2225": "C2225", + "3640": "C3640", + } + + dielectric_min_temp = { + "X": -55, + "Y": -30, + "Z": 10, + } + + dielectric_max_temp = { + "4": 65, + "5": 85, + "6": 105, + "7": 125, + "8": 150, + "9": 200, + } + + @classmethod + def for_me(cls, data: dict) -> bool: + return data.get("firstCategory") == "Capacitors" + + async def build(self, data: dict) -> Capacitor | None: + nominal_capacitance = parsers.capacitance(data.get("description")) + if not nominal_capacitance: + # Handle both zero and None + log.debug("Rejected because capacitance couldn't be found") + return + tolerance_pct = parsers.percent(data.get("description")) + if tolerance_pct is None: + log.debug("Rejected because tolerance couldn't be found") + return + capacitance_farads_min = nominal_capacitance * (1 - tolerance_pct / 100) + capacitance_farads_max = nominal_capacitance * (1 + tolerance_pct / 100) + + rated_voltage_volts = parsers.voltage(data.get("description")) + + dielectric_code = parsers.dielectric(data.get("description")) + if dielectric_code: + operating_temp_celsius_min = self.dielectric_min_temp.get(dielectric_code[0]) + operating_temp_celsius_max = self.dielectric_max_temp.get(dielectric_code[1]) + else: + operating_temp_celsius_min = None + operating_temp_celsius_max = None + + common = self._get_common(data) + if not common: + log.debug("Rejected because common data couldn't be found") + return + + return Capacitor( + footprint_name = self._known_footprints.get(data.get("package")), + capacitance_farads_min=capacitance_farads_min, + capacitance_farads_max=capacitance_farads_max, + rated_voltage_volts=rated_voltage_volts, + operating_temp_celsius_min=operating_temp_celsius_min, + operating_temp_celsius_max=operating_temp_celsius_max, + dielectric_code=dielectric_code, + **common + ) + + +CapacitorFactory.register() + + +async def process(data: dict) -> Part | None: + for factory in AbstractModelFactory.factories: + if factory.for_me(data): + log.debug( + "%s accepted data in category %s-%s", + factory.__class__.__name__, + data.get("firstCategory"), + data.get("secondCategory"), + ) + + component = await factory().build(data) + if component: + log.debug("Built %s", component) + else: + log.debug("Rejected") + + # Always return after the first factory that accepts the data + return component diff --git a/jlcpcb_scraper/main.py b/jlcpcb_scraper/main.py index 07d2461..7dcc85f 100644 --- a/jlcpcb_scraper/main.py +++ b/jlcpcb_scraper/main.py @@ -1,70 +1,72 @@ -from datetime import datetime, timedelta +# %% +# 0. Enable autoreload for development and do imports +try: + import IPython +except ImportError: + print("IPython not found") +else: + if ipython := IPython.get_ipython(): + ipython.run_line_magic("load_ext autoreload") + ipython.run_line_magic("autoreload 2") + print("Enabled autoreload") + +import asyncio +import logging +from datetime import datetime, timedelta, UTC from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -from alembic.config import Config -from alembic import command -from parsers import resistance, capacitance, inductance, voltage, current, dielectric -from scraper import JlcpcbScraper -from models import Category, Part -from config import config +from alembic import command +from alembic.config import Config +from jlcpcb_scraper.config import config +from jlcpcb_scraper.models import Part, create_or_update_part +from jlcpcb_scraper.model_factory import process +from jlcpcb_scraper.scraper import JlcpcbScraper +logging.basicConfig(level=logging.DEBUG) +log = logging.getLogger(__name__) +logging.info("Starting scraper") +# %% # 1. Run Alembic automatic revisions alembic_cfg = Config("alembic.ini") -# command.revision(alembic_cfg, autogenerate=True, message="Automatic revisions") -# command.upgrade(alembic_cfg, "head") # 2. Run Alembic migrations on startup +command.revision(alembic_cfg, autogenerate=True, message="Automatic revisions") command.upgrade(alembic_cfg, "head") +# %% # 3. Get all current Category models from the database with sqlalchemy engine = create_engine(config.SQLALCHEMY_DATABASE_URI) Session = sessionmaker(bind=engine) session = Session() -categories = session.query(Category).all() - # 4. Initialize JLCPCB scraper with the current category models -scraper = JlcpcbScraper(categories=categories) +scraper = JlcpcbScraper(config.JLCPCB_KEY, config.JLCPCB_SECRET) +# $$ # 5. Start scraping parts and update the database with new categories and parts -part_count = 0 -categories_count = 0 -for new_parts, new_categories in scraper.get_parts(session): - # stop if no new parts or categories are found - if new_parts is None and new_categories is None: - break - part_count += len(new_parts) - categories_count += len(new_categories) - session.commit() +for i, part_data in enumerate(scraper.get_parts()): + log.debug("Processing part %s", i) + + if part := asyncio.run(process(part_data)): + log.debug("Part %s accepted", i) + create_or_update_part(session, part) -print(f"Added { part_count } new parts and { categories_count } new categories to the database") + if i % 1000 == 0: + log.info("Committing changes for %s parts to the database", i) + session.commit() +# %% # 6. Remove Parts older than 30 days from the database print("Removing old parts from the database") -old_parts = session.query(Part).filter(Part.last_update < datetime.utcnow() - timedelta(days=30)).all() +old_parts = session.query(Part).filter(Part.last_update < datetime.now(UTC) - timedelta(days=30)).all() for part in old_parts: session.delete(part) session.commit() print(f"Removed { len(old_parts) } old parts from the database") -# 7. Iterate over all parts with a chuncked cursor -print("Parsing part descriptions") -for part in session.query(Part).yield_per(1000): - # 8. Parse the description and update the part with the parsed values - part.resistance = resistance.parse_resistance_description(part.description) - part.capacitance = capacitance.parse_capacitance_description(part.description) - part.inductance = inductance.parse_inductance_description(part.description) - part.voltage = voltage.parse_voltage_description(part.description) - part.current = current.parse_current_description(part.description) - part.dielectric = dielectric.parse_dielectric_description(part.description) - session.add(part) -session.commit() -print("Parsed part descriptions") - - # Save changes to the database session.commit() session.close() \ No newline at end of file diff --git a/jlcpcb_scraper/models.py b/jlcpcb_scraper/models.py index eef792f..6a5fe9d 100644 --- a/jlcpcb_scraper/models.py +++ b/jlcpcb_scraper/models.py @@ -1,79 +1,75 @@ -from sqlalchemy import Column, Integer, String, Boolean, Float, DateTime, ForeignKey -from sqlalchemy.dialects.postgresql import insert -from sqlalchemy.orm import relationship, Session -from sqlalchemy.ext.declarative import declarative_base import datetime +import operator + +from sqlalchemy import Column, DateTime, Float, Integer, String +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.orm import Mapped, Session, declarative_base Base = declarative_base() + +def in_(x: Column, y): + return x.in_(y) + + class Part(Base): - __tablename__ = 'parts' - - id = Column(Integer, primary_key=True, autoincrement=True) - lcsc = Column(String, index=True, unique=True) - category_id = Column(Integer, ForeignKey('categories.id')) - mfr = Column(String) - package = Column(String) - joints = Column(Integer) - manufacturer = Column(String) - basic = Column(Boolean) - description = Column(String) - datasheet = Column(String) - stock = Column(Integer) - price = Column(Float) - last_update = Column(DateTime, default=datetime.datetime.utcnow) - resistance = Column(Float, nullable=True, index=True) - inductance = Column(Float, nullable=True, index=True) - capacitance = Column(Float, nullable=True, index=True) - dielectric = Column(String, nullable=True, index=True) - current = Column(Float, nullable=True, index=True) - voltage = Column(Float, nullable=True, index=True) - -def create_or_update_part(session: Session, part: Part): - stmt = insert(Part).values( - lcsc=part.lcsc, - category_id=part.category_id, - mfr=part.mfr, - package=part.package, - joints=part.joints, - manufacturer=part.manufacturer, - basic=part.basic, - description=part.description, - datasheet=part.datasheet, - stock=part.stock, - price=part.price, - last_update=part.last_update - ).on_conflict_do_update( - index_elements=['lcsc'], + """Abstract base of a part.""" + __abstract__ = True + + # Normal fields + id = Column(Integer, primary_key=True) + last_update = Column(DateTime, default=datetime.datetime.now(datetime.UTC)) + + # Things we need to commonly update + price : Mapped[float] = Column(Float) + stock : Mapped[int] = Column(Integer) + overhead_cost : Mapped[float] = Column(Float) # The cost of the setup, handling and shipping etc... as overhead of using this SKU + rating : Mapped[str] = Column(Integer) # Rating is a magic number representing stock, basic part status and cost. The higher the better. + + lcsc_id : Mapped[str] = Column(String, info={"return": True}, unique=True) + mpn : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}) + + package : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}, nullable=True) + footprint_name : Mapped[str] = Column(String, info={"return": True}) + + +class Resistor(Part): + """A model for a resistor part.""" + __tablename__ = "resistors" + + resistance_ohms_min : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + resistance_ohms_max : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + rated_power_watts : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + + +class Capacitor(Part): + """A model for a capacitor part.""" + __tablename__ = "capacitors" + + capacitance_farads_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + capacitance_farads_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + rated_voltage_volts : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + + # Dielectric type information: rated temperature range and temperature variation + # https://blog.knowlescapacitors.com/blog/simplify-capacitor-dielectric-selection-by-understanding-dielectric-coding-methods + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + # TODO: figure out how we can encode tolerance based on temperature variation + dielectric_code : Mapped[str] = Column(String) + + +def create_or_update_part(session: Session, comp: Part) -> Part: + d = {k: v for k, v in comp.__dict__.items() if k in comp.__table__.columns.keys() and k != 'id'} + stmt = insert(comp.__class__).values(**d).on_conflict_do_update( + index_elements=['lcsc_id'], set_={ - 'price': part.price, - 'stock': part.stock, - 'last_update': part.last_update + 'price': comp.price, + 'stock': comp.stock, + 'last_update': comp.last_update } ) session.execute(stmt) - return part - - -class Category(Base): - __tablename__ = 'categories' - - id = Column(Integer, primary_key=True, autoincrement=True) - name = Column(String) - subcategory_name = Column(String) - parts = relationship('Part', backref='category') - - @property - def component_count(self): - return sum([subclass.component_count for subclass in self.subclasses]) - -def create_or_update_category(session: Session, category: Category): - existing_category = session.query(Category).filter_by(name=category.subcategory_name).first() - if existing_category: - existing_category.subcategory_name = category.subcategory_name - return existing_category - else: - category.id = session.add(category) - session.commit() - return category \ No newline at end of file + return comp diff --git a/jlcpcb_scraper/parsers.py b/jlcpcb_scraper/parsers.py index b078b6b..46a9f98 100644 --- a/jlcpcb_scraper/parsers.py +++ b/jlcpcb_scraper/parsers.py @@ -1,3 +1,4 @@ +import re from functools import partial _multipliers = { @@ -46,20 +47,9 @@ def _parse(unit: str, description: str) -> float | None: def dielectric(description: str) -> str | None: """Parse the dielectric from a component description""" - dielectric_value = None - if description is None: - return dielectric_value - - if "C0G" in description: - dielectric_value = "C0G" - elif "X7R" in description: - dielectric_value = "X7R" - elif "X5R" in description: - dielectric_value = "X5R" - elif "Y5V" in description: - dielectric_value = "Y5V" - - return dielectric_value + dielectric_code = re.compile(r"\b[XYZ][4-9][PRLSTUV]\b") + dielectric_value = dielectric_code.search(description) + return dielectric_value.group() if dielectric_value else None def percent(description: str | None) -> float | None: """Parse the percentage from a component description""" diff --git a/requirements.txt b/requirements.txt index 17dc56a..5ea8acf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ fake-useragent==1.4.0 sqlalchemy==2.0.27 python-dotenv==1.0.1 alembic==1.13.1 -psycopg==3.1.18 -psycopg2-binary==2.9.8 \ No newline at end of file +psycopg==3.1.19 +psycopg-binary==3.1.19 diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 1d483ea..93903ed 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -5,3 +5,12 @@ def test_voltage(): assert voltage("10V 20V") == 10 assert voltage("20V 10V") == 20 assert voltage("432mV") == 0.432 + + +def test_dielectric(): + from jlcpcb_scraper.parsers import dielectric + assert dielectric("asdasd asdas X5P") == "X5P" + assert dielectric("12312kjnkj123 X7R") == "X7R" + assert dielectric("X7R 10%") == "X7R" + assert dielectric("X7R 10% 20V") == "X7R" + assert dielectric("asdasX7R 10% 20V") is None From bb54f2e640f41cc39491715859a81c71dd104009 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Wed, 10 Jul 2024 12:32:45 -0700 Subject: [PATCH 06/11] Parse resistors and caps --- README.md | 25 ++++++- alembic.ini | 9 ++- alembic/versions/1b3360ddd371_.py | 53 -------------- alembic/versions/919647924b0b_.py | 52 ------------- .../versions/c041226a1f05_initial_commit.py | 69 ++++++++++++++++++ alembic/versions/f2a507ac545d_.py | 30 -------- jlcpcb_scraper/Untitled-1.py | 73 ------------------- jlcpcb_scraper/deps.py | 33 --------- jlcpcb_scraper/do_configure.py | 18 ----- jlcpcb_scraper/factory.py | 16 ++-- jlcpcb_scraper/main.py | 42 +++++------ jlcpcb_scraper/parsers.py | 9 ++- jlcpcb_scraper/scraper.py | 20 ++--- 13 files changed, 147 insertions(+), 302 deletions(-) delete mode 100644 alembic/versions/1b3360ddd371_.py delete mode 100644 alembic/versions/919647924b0b_.py create mode 100644 alembic/versions/c041226a1f05_initial_commit.py delete mode 100644 alembic/versions/f2a507ac545d_.py delete mode 100644 jlcpcb_scraper/Untitled-1.py delete mode 100755 jlcpcb_scraper/deps.py delete mode 100644 jlcpcb_scraper/do_configure.py diff --git a/README.md b/README.md index 5ec2722..f3085b2 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,9 @@ pip install -r requirements.txt ## Usage +Fill in the details in the .env file + +### OR Create a Postgres DB and make sure it is reachable by the pc that will execute this script. Set environment variable SQLALCHEMY_DATABASE_URI, JLCPCB_KEY and JLCPCB_SECRET @@ -24,7 +27,7 @@ export JLCPCB_SECRET= Execute main.py ``` python3 jlcpcb_scraper/main.py -``` +``` ## Testing TODO, INCOMPLETE @@ -43,4 +46,22 @@ Please make sure to update tests as appropriate. ## License -[MIT](https://choosealicense.com/licenses/mit/) \ No newline at end of file +[MIT](https://choosealicense.com/licenses/mit/) + +## Creating a DB + +You need Postgres running; eg. `docker up` from the component server project + +Alembic won't create a DB, you need to do that manually from Postgres Admin or the likes. Name is `atopile-components` or something like that. + +I've had issues with alembic creating tables from scratch? Perhaps I had things in a dirty state. + +```python +from sqlalchemy import create_engine + +from jlcpcb_scraper.config import config +from jlcpcb_scraper.models import Base + +engine = create_engine(config.SQLALCHEMY_DATABASE_URI) +Base.metadata.create_all(engine) +``` diff --git a/alembic.ini b/alembic.ini index 282a900..8db3b4b 100644 --- a/alembic.ini +++ b/alembic.ini @@ -82,7 +82,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # Logging configuration [loggers] -keys = root,sqlalchemy,alembic +keys = root,sqlalchemy,alembic,jlcpcb_scraper [handlers] keys = console @@ -91,7 +91,7 @@ keys = console keys = generic [logger_root] -level = WARN +level = DEBUG handlers = console qualname = @@ -105,6 +105,11 @@ level = INFO handlers = qualname = alembic +[logger_jlcpcb_scraper] +level = INFO +handlers = +qualname = jlcpcb_scraper + [handler_console] class = StreamHandler args = (sys.stderr,) diff --git a/alembic/versions/1b3360ddd371_.py b/alembic/versions/1b3360ddd371_.py deleted file mode 100644 index 9b2d977..0000000 --- a/alembic/versions/1b3360ddd371_.py +++ /dev/null @@ -1,53 +0,0 @@ -"""empty message - -Revision ID: 1b3360ddd371 -Revises: -Create Date: 2024-02-24 16:09:53.969174 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '1b3360ddd371' -down_revision: Union[str, None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('categories', - sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), - sa.Column('name', sa.String(), nullable=True), - sa.Column('subcategory_name', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table('parts', - sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), - sa.Column('lcsc', sa.String(), nullable=True), - sa.Column('category_id', sa.Integer(), nullable=True), - sa.Column('mfr', sa.String(), nullable=True), - sa.Column('package', sa.String(), nullable=True), - sa.Column('joints', sa.Integer(), nullable=True), - sa.Column('manufacturer', sa.String(), nullable=True), - sa.Column('basic', sa.Boolean(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('datasheet', sa.String(), nullable=True), - sa.Column('stock', sa.Integer(), nullable=True), - sa.Column('price', sa.Float(), nullable=True), - sa.Column('last_update', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint(['category_id'], ['categories.id'], ), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('parts') - op.drop_table('categories') - # ### end Alembic commands ### diff --git a/alembic/versions/919647924b0b_.py b/alembic/versions/919647924b0b_.py deleted file mode 100644 index 78f45ed..0000000 --- a/alembic/versions/919647924b0b_.py +++ /dev/null @@ -1,52 +0,0 @@ -"""empty message - -Revision ID: 919647924b0b -Revises: f2a507ac545d -Create Date: 2024-02-25 11:56:02.211392 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '919647924b0b' -down_revision: Union[str, None] = 'f2a507ac545d' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('parts', sa.Column('resistance', sa.Float(), nullable=True)) - op.add_column('parts', sa.Column('inductance', sa.Float(), nullable=True)) - op.add_column('parts', sa.Column('capacitance', sa.Float(), nullable=True)) - op.add_column('parts', sa.Column('dielectric', sa.String(), nullable=True)) - op.add_column('parts', sa.Column('current', sa.Float(), nullable=True)) - op.add_column('parts', sa.Column('voltage', sa.Float(), nullable=True)) - op.create_index(op.f('ix_parts_capacitance'), 'parts', ['capacitance'], unique=False) - op.create_index(op.f('ix_parts_current'), 'parts', ['current'], unique=False) - op.create_index(op.f('ix_parts_dielectric'), 'parts', ['dielectric'], unique=False) - op.create_index(op.f('ix_parts_inductance'), 'parts', ['inductance'], unique=False) - op.create_index(op.f('ix_parts_resistance'), 'parts', ['resistance'], unique=False) - op.create_index(op.f('ix_parts_voltage'), 'parts', ['voltage'], unique=False) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_parts_voltage'), table_name='parts') - op.drop_index(op.f('ix_parts_resistance'), table_name='parts') - op.drop_index(op.f('ix_parts_inductance'), table_name='parts') - op.drop_index(op.f('ix_parts_dielectric'), table_name='parts') - op.drop_index(op.f('ix_parts_current'), table_name='parts') - op.drop_index(op.f('ix_parts_capacitance'), table_name='parts') - op.drop_column('parts', 'voltage') - op.drop_column('parts', 'current') - op.drop_column('parts', 'dielectric') - op.drop_column('parts', 'capacitance') - op.drop_column('parts', 'inductance') - op.drop_column('parts', 'resistance') - # ### end Alembic commands ### diff --git a/alembic/versions/c041226a1f05_initial_commit.py b/alembic/versions/c041226a1f05_initial_commit.py new file mode 100644 index 0000000..ff38d8c --- /dev/null +++ b/alembic/versions/c041226a1f05_initial_commit.py @@ -0,0 +1,69 @@ +"""Initial commit + +Revision ID: c041226a1f05 +Revises: +Create Date: 2024-07-10 10:16:28.718282 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'c041226a1f05' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('capacitors', + sa.Column('capacitance_farads_min', sa.Float(), nullable=True), + sa.Column('capacitance_farads_max', sa.Float(), nullable=True), + sa.Column('rated_voltage_volts', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_min', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_max', sa.Float(), nullable=True), + sa.Column('dielectric_code', sa.String(), nullable=True), + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('last_update', sa.DateTime(), nullable=True), + sa.Column('price', sa.Float(), nullable=True), + sa.Column('stock', sa.Integer(), nullable=True), + sa.Column('overhead_cost', sa.Float(), nullable=True), + sa.Column('rating', sa.Integer(), nullable=True), + sa.Column('lcsc_id', sa.String(), nullable=True), + sa.Column('mpn', sa.String(), nullable=True), + sa.Column('package', sa.String(), nullable=True), + sa.Column('footprint_name', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('lcsc_id') + ) + op.create_table('resistors', + sa.Column('resistance_ohms_min', sa.Float(), nullable=True), + sa.Column('resistance_ohms_max', sa.Float(), nullable=True), + sa.Column('rated_power_watts', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_min', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_max', sa.Float(), nullable=True), + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('last_update', sa.DateTime(), nullable=True), + sa.Column('price', sa.Float(), nullable=True), + sa.Column('stock', sa.Integer(), nullable=True), + sa.Column('overhead_cost', sa.Float(), nullable=True), + sa.Column('rating', sa.Integer(), nullable=True), + sa.Column('lcsc_id', sa.String(), nullable=True), + sa.Column('mpn', sa.String(), nullable=True), + sa.Column('package', sa.String(), nullable=True), + sa.Column('footprint_name', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('lcsc_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('resistors') + op.drop_table('capacitors') + # ### end Alembic commands ### diff --git a/alembic/versions/f2a507ac545d_.py b/alembic/versions/f2a507ac545d_.py deleted file mode 100644 index 8ad4e2b..0000000 --- a/alembic/versions/f2a507ac545d_.py +++ /dev/null @@ -1,30 +0,0 @@ -"""empty message - -Revision ID: f2a507ac545d -Revises: 1b3360ddd371 -Create Date: 2024-02-24 16:17:52.988335 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = 'f2a507ac545d' -down_revision: Union[str, None] = '1b3360ddd371' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_index(op.f('ix_parts_lcsc'), 'parts', ['lcsc'], unique=True) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_parts_lcsc'), table_name='parts') - # ### end Alembic commands ### diff --git a/jlcpcb_scraper/Untitled-1.py b/jlcpcb_scraper/Untitled-1.py deleted file mode 100644 index d340c20..0000000 --- a/jlcpcb_scraper/Untitled-1.py +++ /dev/null @@ -1,73 +0,0 @@ -# %% -try: - import IPython -except ImportError: - print("IPython not found") -else: - ipython = IPython.get_ipython() - ipython.magic("load_ext autoreload") - ipython.magic("autoreload 2") - print("Enabled autoreload") - - -import logging -from jlcpcb_scraper.scraper import JlcpcbScraper - -# %% -logging.basicConfig(level=logging.DEBUG) -logging.info("Starting JLCPCB scraper") - - -JLCPCB_KEY="app_key4699520" -JLCPCB_SECRET="app_secret4699520" - -scraper = JlcpcbScraper( - key=JLCPCB_KEY, - secret=JLCPCB_SECRET -) - -# %% -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from jlcpcb_scraper.models import Base, create_or_update_part - -# Create an SQLite database and a session -engine = create_engine('sqlite:///example.db') -Base.metadata.create_all(engine) - -Session = sessionmaker(bind=engine) -session = Session() - -# %% -from jlcpcb_scraper.model_factory import process -first_2000_parts = [] - -for i, part_data in enumerate(scraper.get_parts()): - if i > 2000: - break - first_2000_parts.append(part_data) - -# %% -for i, part_data in enumerate(first_2000_parts): - logging.debug(f"Processing part {i}") - if part := await process(part_data): - logging.debug(f"Part {i} accepted") - create_or_update_part(session, part) - -session.commit() - -# %% -from jlcpcb_scraper.models import Resistor, Capacitor - -for i, p in enumerate(session.query(Resistor).all()): - print(p.__dict__) - - if i > 100: - break - -for i, p in enumerate(session.query(Capacitor).all()): - print(p.__dict__) - - if i > 100: - break -# %% diff --git a/jlcpcb_scraper/deps.py b/jlcpcb_scraper/deps.py deleted file mode 100755 index 6945fe3..0000000 --- a/jlcpcb_scraper/deps.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Generator - -from contextlib import contextmanager - -from sqlalchemy.orm.session import Session - -from session import SessionLocal - - -def get_db() -> Generator: - db = SessionLocal() - try: - yield db - db.commit() - except: - db.rollback() - raise - finally: - db.close() - - -@contextmanager -def session_scope(): - """Provide a transactional scope around a series of operations.""" - session = SessionLocal() - try: - yield session - session.commit() - except: - session.rollback() - raise - finally: - session.close() diff --git a/jlcpcb_scraper/do_configure.py b/jlcpcb_scraper/do_configure.py deleted file mode 100644 index aaf8aa7..0000000 --- a/jlcpcb_scraper/do_configure.py +++ /dev/null @@ -1,18 +0,0 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from alembic.config import Config -from alembic import command - -from config import config - -# 1. Run Alembic automatic revisions -alembic_cfg = Config("alembic.ini") -command.revision(alembic_cfg, autogenerate=True, message="Automatic revisions") - -# 2. Run Alembic migrations on startup -command.upgrade(alembic_cfg, "head") - -# 3. Get all current Category models from the database with sqlalchemy -engine = create_engine(config.SQLALCHEMY_DATABASE_URI) -Session = sessionmaker(bind=engine) -session = Session() diff --git a/jlcpcb_scraper/factory.py b/jlcpcb_scraper/factory.py index 174d3c9..cdb0f56 100644 --- a/jlcpcb_scraper/factory.py +++ b/jlcpcb_scraper/factory.py @@ -115,7 +115,10 @@ class ResistorFactory(AbstractModelFactory): @classmethod def for_me(cls, data: dict) -> bool: - return data.get("firstCategory") == "Resistors" + return ( + data.get("firstCategory") == "Resistors" and + data.get("secondCategory") == "Chip Resistor - Surface Mount" + ) async def build(self, data: dict) -> Resistor | None: nominal_resistance = parsers.resistance(data.get("description")) @@ -183,7 +186,10 @@ class CapacitorFactory(AbstractModelFactory): @classmethod def for_me(cls, data: dict) -> bool: - return data.get("firstCategory") == "Capacitors" + return ( + data.get("firstCategory") == "Capacitors" and + data.get("secondCategory") == "Multilayer Ceramic Capacitors MLCC - SMD/SMT" + ) async def build(self, data: dict) -> Capacitor | None: nominal_capacitance = parsers.capacitance(data.get("description")) @@ -231,7 +237,7 @@ async def build(self, data: dict) -> Capacitor | None: async def process(data: dict) -> Part | None: for factory in AbstractModelFactory.factories: if factory.for_me(data): - log.debug( + log.info( "%s accepted data in category %s-%s", factory.__class__.__name__, data.get("firstCategory"), @@ -240,9 +246,9 @@ async def process(data: dict) -> Part | None: component = await factory().build(data) if component: - log.debug("Built %s", component) + log.info("Built %s", component) else: - log.debug("Rejected") + log.info("Rejected") # Always return after the first factory that accepts the data return component diff --git a/jlcpcb_scraper/main.py b/jlcpcb_scraper/main.py index 7dcc85f..a25bfae 100644 --- a/jlcpcb_scraper/main.py +++ b/jlcpcb_scraper/main.py @@ -6,47 +6,45 @@ print("IPython not found") else: if ipython := IPython.get_ipython(): - ipython.run_line_magic("load_ext autoreload") - ipython.run_line_magic("autoreload 2") + ipython.magic("load_ext autoreload") + ipython.magic("autoreload 2") print("Enabled autoreload") import asyncio import logging -from datetime import datetime, timedelta, UTC +from datetime import UTC, datetime, timedelta from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker +import alembic.config from alembic import command -from alembic.config import Config from jlcpcb_scraper.config import config +from jlcpcb_scraper.factory import process from jlcpcb_scraper.models import Part, create_or_update_part -from jlcpcb_scraper.model_factory import process from jlcpcb_scraper.scraper import JlcpcbScraper -logging.basicConfig(level=logging.DEBUG) -log = logging.getLogger(__name__) -logging.info("Starting scraper") # %% -# 1. Run Alembic automatic revisions -alembic_cfg = Config("alembic.ini") - -# 2. Run Alembic migrations on startup -command.revision(alembic_cfg, autogenerate=True, message="Automatic revisions") +# 1. Manage db schema with alembic +# Revision this manually with `alembic revision --autogenerate -m "My message"` +alembic_cfg = alembic.config.Config("alembic.ini") +# This also configures the base logger from the alembic command.upgrade(alembic_cfg, "head") +log = logging.getLogger(__name__) +log.info("Starting scraper") + # %% -# 3. Get all current Category models from the database with sqlalchemy +# 2. Create a database session, ensuring the tables structure exists engine = create_engine(config.SQLALCHEMY_DATABASE_URI) Session = sessionmaker(bind=engine) session = Session() -# 4. Initialize JLCPCB scraper with the current category models -scraper = JlcpcbScraper(config.JLCPCB_KEY, config.JLCPCB_SECRET) -# $$ -# 5. Start scraping parts and update the database with new categories and parts +# %% +# 3. Initialize JLCPCB scraper with the current category models +scraper = JlcpcbScraper(config.JLCPCB_KEY, config.JLCPCB_SECRET) for i, part_data in enumerate(scraper.get_parts()): log.debug("Processing part %s", i) @@ -58,8 +56,9 @@ log.info("Committing changes for %s parts to the database", i) session.commit() + # %% -# 6. Remove Parts older than 30 days from the database +# 4. Remove Parts older than 30 days from the database print("Removing old parts from the database") old_parts = session.query(Part).filter(Part.last_update < datetime.now(UTC) - timedelta(days=30)).all() for part in old_parts: @@ -67,6 +66,5 @@ session.commit() print(f"Removed { len(old_parts) } old parts from the database") -# Save changes to the database -session.commit() -session.close() \ No newline at end of file +# Clean up +session.close() diff --git a/jlcpcb_scraper/parsers.py b/jlcpcb_scraper/parsers.py index 46a9f98..25062b4 100644 --- a/jlcpcb_scraper/parsers.py +++ b/jlcpcb_scraper/parsers.py @@ -12,12 +12,13 @@ "M": 1000_000.0, } + def _parse(unit: str, description: str) -> float | None: """Parse the capacitance from a component description""" if description is None: return None - for part in description.split(' '): + for part in description.split(" "): if len(part) < 2: continue @@ -38,6 +39,7 @@ def _parse(unit: str, description: str) -> float | None: except ValueError: pass + capacitance = partial(_parse, "F") resistance = partial(_parse, "Ω") inductance = partial(_parse, "H") @@ -45,18 +47,20 @@ def _parse(unit: str, description: str) -> float | None: current = partial(_parse, "A") voltage = partial(_parse, "V") + def dielectric(description: str) -> str | None: """Parse the dielectric from a component description""" dielectric_code = re.compile(r"\b[XYZ][4-9][PRLSTUV]\b") dielectric_value = dielectric_code.search(description) return dielectric_value.group() if dielectric_value else None + def percent(description: str | None) -> float | None: """Parse the percentage from a component description""" if description is None: return None - for fragment in description.split(' '): + for fragment in description.split(" "): if fragment.endswith("%"): while not fragment[0].isdigit(): fragment = fragment[1:] @@ -65,6 +69,7 @@ def percent(description: str | None) -> float | None: except ValueError: pass + def price(price_description: str) -> float | None: """ string input example: "'20-180:0.004285714,200-780:0.003485714,1600-9580:0.002771429,800-1580:0.003042857,9600-19980:0.002542857,20000-:0.002414286'" diff --git a/jlcpcb_scraper/scraper.py b/jlcpcb_scraper/scraper.py index 93f3d80..7fc21b2 100644 --- a/jlcpcb_scraper/scraper.py +++ b/jlcpcb_scraper/scraper.py @@ -1,15 +1,14 @@ -import os import logging +import os import time from datetime import datetime, timedelta +from typing import Generator import requests -from requests.adapters import HTTPAdapter from fake_useragent import UserAgent -from typing import Generator - +from requests.adapters import HTTPAdapter -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) JLCPCB_KEY = os.environ.get("JLCPCB_KEY") @@ -46,7 +45,7 @@ def __init__( self._obtain_token() # Wew! - logger.info("JlcpcbScraper initialized") + log.info("JlcpcbScraper initialized") def _obtain_token(self) -> None: if not self.key or not self.secret: @@ -81,26 +80,27 @@ def _obtain_token(self) -> None: def get_parts(self) -> Generator[dict, None, None]: request_count = 0 while True: - logger.info("Fetching page %s", request_count) + log.info("Fetching page %s", request_count) request_count += 1 response = self.session.post( "https://jlcpcb.com/external/component/getComponentInfos", data={"lastKey": self.last_key} if self.last_key else None, + timeout=10, ) if response.status_code != 200: - logger.error("Cannot obtain parts, status code not 200: %s", response) + log.error("Cannot obtain parts, status code not 200: %s", response) return response_data: dict = response.json() if not response_data.get("code") == 200: - logger.error( + log.error( "Cannot obtain parts, internal status code not 200: %s", response_data, ) return if not response_data.get("data", {}).get("componentInfos"): - logger.info("No more parts to fetch") + log.info("No more parts to fetch") return self._parse_pagination(response_data) From 1a92e3343efdfe7a40c0c057bb8224fae4175206 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Wed, 10 Jul 2024 14:29:55 -0700 Subject: [PATCH 07/11] Consolidate two projects --- Dockerfile.scraper | 10 + Dockerfile => Dockerfile.server | 2 +- README.md | 27 ++ alembic/env.py | 4 +- docker-compose.yml | 19 + requirements.txt => requirements-scraper.txt | 0 requirements-server.txt | 6 + setup.py | 8 +- {jlcpcb_scraper => src}/__init__.py | 0 {jlcpcb_scraper => src}/base.py | 2 +- {jlcpcb_scraper => src}/config.py | 0 {jlcpcb_scraper => src}/factory.py | 4 +- .../scraper.py => src/jlcpcb_scraper.py | 0 {jlcpcb_scraper => src}/models.py | 0 {jlcpcb_scraper => src}/parsers.py | 0 jlcpcb_scraper/main.py => src/scraper.py | 8 +- src/server.py | 453 ++++++++++++++++++ {jlcpcb_scraper => src}/session.py | 4 +- tests/test_parsers.py | 4 +- 19 files changed, 530 insertions(+), 21 deletions(-) create mode 100644 Dockerfile.scraper rename Dockerfile => Dockerfile.server (73%) create mode 100644 docker-compose.yml rename requirements.txt => requirements-scraper.txt (100%) create mode 100644 requirements-server.txt rename {jlcpcb_scraper => src}/__init__.py (100%) rename {jlcpcb_scraper => src}/base.py (69%) rename {jlcpcb_scraper => src}/config.py (100%) rename {jlcpcb_scraper => src}/factory.py (98%) rename jlcpcb_scraper/scraper.py => src/jlcpcb_scraper.py (100%) rename {jlcpcb_scraper => src}/models.py (100%) rename {jlcpcb_scraper => src}/parsers.py (100%) rename jlcpcb_scraper/main.py => src/scraper.py (90%) create mode 100755 src/server.py rename {jlcpcb_scraper => src}/session.py (95%) diff --git a/Dockerfile.scraper b/Dockerfile.scraper new file mode 100644 index 0000000..623c16d --- /dev/null +++ b/Dockerfile.scraper @@ -0,0 +1,10 @@ +FROM python:3.12 + +WORKDIR /usr/src/app + +COPY requirements-scraper.txt ./ +RUN pip install --no-cache-dir -r requirements-scraper.txt + +COPY . . + +CMD [ "python", "./src/scraper.py" ] diff --git a/Dockerfile b/Dockerfile.server similarity index 73% rename from Dockerfile rename to Dockerfile.server index e300fee..de999b0 100644 --- a/Dockerfile +++ b/Dockerfile.server @@ -7,4 +7,4 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . -CMD [ "python", "./jlcpcb_scraper/main.py" ] +CMD [ "python", "./src/server.py" ] diff --git a/README.md b/README.md index f3085b2..ff04267 100644 --- a/README.md +++ b/README.md @@ -65,3 +65,30 @@ from jlcpcb_scraper.models import Base engine = create_engine(config.SQLALCHEMY_DATABASE_URI) Base.metadata.create_all(engine) ``` + +## Local developement + +### Running a local postgreSQL database + +Make sure docker is running and run `docker-compose up`. + +### Updating the alembic database schema + +If you change the database schema, use the following command to update the alembic version and migration script: `alembic revision --autogenerate -m "explain what happened"` + +To apply the changes to the database, run `alembic upgrade head`. Your database should now follow the alchemy ORM schema. + +### Inspecting your database + +To inspect the contents of your database and make changes manually, use the `psql` util. `psql` can be installed with `brew` on mac with `brew install postgresql@16`. + +```bash +psql -U atopile -h localhost -p 5432 -d atopile-components +``` + +To list the tables, invoke `\dt`. To quit, invoke `exit`. To delete tables we are not using anymore, invoke `DROP TABLE public.alembic_version, public.ranged_values; + +### Starting the server + +1. Run `docker-compose up` to start PostgreSQL. +2. Run `fastapi dev endpoints.py` to start the FastAPI server. diff --git a/alembic/env.py b/alembic/env.py index 3fa5176..e382e5f 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -5,8 +5,8 @@ from alembic import context -from jlcpcb_scraper.config import config as app_config -from jlcpcb_scraper.base import * +from src.config import config as app_config +from src.base import * # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..896afac --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3.9" + +services: + postgres: + container_name: container-pg + image: "postgres:16-alpine" + hostname: localhost + restart: always + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + ports: + - "5432:5432" + volumes: + - postgres-data:/var/lib/postgresql/data + +volumes: + postgres-data: diff --git a/requirements.txt b/requirements-scraper.txt similarity index 100% rename from requirements.txt rename to requirements-scraper.txt diff --git a/requirements-server.txt b/requirements-server.txt new file mode 100644 index 0000000..b12bcc0 --- /dev/null +++ b/requirements-server.txt @@ -0,0 +1,6 @@ +sqlalchemy==2.0.27 +python-dotenv==1.0.1 +psycopg==3.1.19 +psycopg-binary==3.1.19 +fastapi==0.111.0 +pydantic==2.8.2 diff --git a/setup.py b/setup.py index a346244..1bf663c 100644 --- a/setup.py +++ b/setup.py @@ -8,14 +8,10 @@ 'requests', 'beautifulsoup4', ], - entry_points={ - 'console_scripts': [ - 'jlcpcb_scraper = jlcpcb_scraper.scraper:main', - ], - }, + entry_points={}, url='https://github.com/yourusername/jlcpcb_scraper', license='MIT', author='Your Name', author_email='your.email@example.com', description='A Python module to scrape jlcpcb.com/parts for all available parts' -) \ No newline at end of file +) diff --git a/jlcpcb_scraper/__init__.py b/src/__init__.py similarity index 100% rename from jlcpcb_scraper/__init__.py rename to src/__init__.py diff --git a/jlcpcb_scraper/base.py b/src/base.py similarity index 69% rename from jlcpcb_scraper/base.py rename to src/base.py index 4db61a0..3a9f8e0 100755 --- a/jlcpcb_scraper/base.py +++ b/src/base.py @@ -1,3 +1,3 @@ # Import all the models, so that Base has them before being # imported by Alembic -from jlcpcb_scraper.models import * +from config.models import * diff --git a/jlcpcb_scraper/config.py b/src/config.py similarity index 100% rename from jlcpcb_scraper/config.py rename to src/config.py diff --git a/jlcpcb_scraper/factory.py b/src/factory.py similarity index 98% rename from jlcpcb_scraper/factory.py rename to src/factory.py index cdb0f56..1f5cd16 100644 --- a/jlcpcb_scraper/factory.py +++ b/src/factory.py @@ -6,8 +6,8 @@ import logging from typing import Type, TypeVar -from jlcpcb_scraper import parsers -from jlcpcb_scraper.models import Capacitor, Part, Resistor +from src import parsers +from src.models import Capacitor, Part, Resistor T = TypeVar("T", bound="AbstractModelFactory") diff --git a/jlcpcb_scraper/scraper.py b/src/jlcpcb_scraper.py similarity index 100% rename from jlcpcb_scraper/scraper.py rename to src/jlcpcb_scraper.py diff --git a/jlcpcb_scraper/models.py b/src/models.py similarity index 100% rename from jlcpcb_scraper/models.py rename to src/models.py diff --git a/jlcpcb_scraper/parsers.py b/src/parsers.py similarity index 100% rename from jlcpcb_scraper/parsers.py rename to src/parsers.py diff --git a/jlcpcb_scraper/main.py b/src/scraper.py similarity index 90% rename from jlcpcb_scraper/main.py rename to src/scraper.py index a25bfae..cb55c93 100644 --- a/jlcpcb_scraper/main.py +++ b/src/scraper.py @@ -19,10 +19,10 @@ import alembic.config from alembic import command -from jlcpcb_scraper.config import config -from jlcpcb_scraper.factory import process -from jlcpcb_scraper.models import Part, create_or_update_part -from jlcpcb_scraper.scraper import JlcpcbScraper +from src.config import config +from src.factory import process +from src.models import Part, create_or_update_part +from src.jlcpcb_scraper import JlcpcbScraper # %% diff --git a/src/server.py b/src/server.py new file mode 100755 index 0000000..3c89dc8 --- /dev/null +++ b/src/server.py @@ -0,0 +1,453 @@ +#!python3 + +from copy import copy +from typing import Optional, Type + +import sqlalchemy.inspection +from fastapi import Depends, FastAPI, HTTPException +from pydantic import BaseModel, create_model +from sqlalchemy.orm import DeclarativeBase, Query, Session + +from src import models +from src.session import SessionLocal + +app = FastAPI() + + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() + + +@app.get("/") +async def root(): + return ( + "This is the atopile component server! It's meant to be queried to the ato CLI" + ) + + +# Dynamically generate the pydantic models from the SQLAlchemy models +def _create_pydantic_request_model(model: Type[DeclarativeBase]) -> BaseModel: + columns = {} + for column in sqlalchemy.inspection.inspect(model).columns: + query_operator = column.info.get("query_operator") + if query_operator is None: + continue + + # For things that are strings (practically an enum), we should be able to query from a list + if query_operator is models.in_: + python_type = list[column.type.python_type] + else: + python_type = column.type.python_type + + columns[column.name] = (Optional[python_type], None) # No default + return create_model(model.__name__, **columns, __config__={"from_attributes": True}) + + +def _create_pydantic_response_model(model: Type[DeclarativeBase]) -> BaseModel: + columns = {} + for column in sqlalchemy.inspection.inspect(model).columns: + if not column.info.get("return", False): + continue + columns[column.name] = (column.type.python_type, None) # No default + + return create_model(model.__name__, **columns, __config__={"from_attributes": True}) + + +def _filter_components( + model: Type[DeclarativeBase], request: BaseModel, query: Query +) -> Query: + """Apply filtering based on the database scheme and query data.""" + # Apply filtering based on the query + for column in sqlalchemy.inspection.inspect(model).columns: + query_operator = column.info.get("query_operator") + if query_operator is None: + continue + + query_value = getattr(request, column.name) + if query_value is None: + continue + + query = query.where(query_operator(getattr(model, column.name), query_value)) + return query + + +def _find_component(model: Type[DeclarativeBase], request: BaseModel, db: Session): + assert hasattr(model, "rating"), f"Component {model} is missing a rating column" + + with db as session: + db_query = session.query(model) + db_query = _filter_components(model, request, db_query) + + # Execute the query + candidate = db_query.first() + if candidate is None: + raise HTTPException(status_code=500, detail="No component found") + + return candidate + + +# Diagnosis tools to figure out what's over-constraining params +class DiagnosticReport(BaseModel): + """A report of the number of results, if we removed each of the filters, one at a time.""" + + results_without_filter: dict[str, int] + + +def _do_diag( + model: Type[DeclarativeBase], request: BaseModel, db: Session +) -> DiagnosticReport: + report = {} + for name in request.model_fields: + # Skip fields for which the request is already None + # We can't de-restrict these further + if getattr(request, name) is None: + continue + + # Create a copy of the request, but with the field set to None + new_request = copy(request) + setattr(new_request, name, None) + + # Filter for components without this constraint + with db as session: + db_query = session.query(model) + db_query = _filter_components(model, new_request, db_query) + count = db_query.count() + report[name] = count + + return DiagnosticReport(results_without_filter=report) + + +##################### +# Resistor end point +##################### + +ResistorRequest = _create_pydantic_request_model(models.Resistor) +ResistorResponse = _create_pydantic_response_model(models.Resistor) + + +@app.post("/v2/find/resistor", response_model=ResistorResponse) +async def get_resistor(request: ResistorRequest, db: Session = Depends(get_db)): + """ + Get a resistor based on the resistor specs. + If is a spec is omitted, it will allow any value for that spec in the search. + """ + return _find_component(models.Resistor, request, db) + + +@app.post("/v2/find/diagnose/resistor", response_model=DiagnosticReport) +async def get_resistor_diagnostic( + request: ResistorRequest, db: Session = Depends(get_db) +): + """ + Get a diagnostics report on a resistor search. + """ + return _do_diag(models.Resistor, request, db) + + +# ##################### +# # Capacitor endpoint +# ##################### + + +# def capacitor_capacitance_filter(query: Query, api_data: api_schema.Capacitor): +# return query.filter( +# models.Capacitor.capacitance_min_farads >= api_data.capacitance_farads.min, +# models.Capacitor.capacitance_max_farads <= api_data.capacitance_farads.max, +# ) + + +# def capacitor_voltage_rating_filter(query: Query, api_data: api_schema.Capacitor): +# return query.filter( +# models.Capacitor.voltage_rating_min_volts +# <= api_data.voltage_rating_volts.min, +# models.Capacitor.voltage_rating_max_volts +# >= api_data.voltage_rating_volts.max, +# ) + + +# def capacitor_equivalent_series_resistance_filter( +# query: Query, api_data: api_schema.Capacitor +# ): +# if api_data.equivalent_series_resistance_ohms is None: +# return query +# return query.filter( +# models.Capacitor.equivalent_series_resistance_min_ohms +# >= api_data.equivalent_series_resistance_ohms.min, +# models.Capacitor.equivalent_series_resistance_max_ohms +# <= api_data.equivalent_series_resistance_ohms.max, +# ) + + +# def capacitor_temperature_coefficient_filter( +# query: Query, api_data: api_schema.Capacitor +# ): +# if api_data.temperature_coefficient_farads_per_celsius is None: +# return query +# return query.filter( +# models.Capacitor.temperature_coefficient_min_farads_per_celsius +# <= api_data.temperature_coefficient_farads_per_celsius.min, +# models.Capacitor.temperature_coefficient_max_farads_per_celsius +# >= api_data.temperature_coefficient_farads_per_celsius.max, +# ) + + +# capacitor_filters = [ +# capacitor_capacitance_filter, +# capacitor_voltage_rating_filter, +# capacitor_equivalent_series_resistance_filter, +# capacitor_temperature_coefficient_filter, +# ] + + +# @app.post("/capacitor", response_model=api_schema.CapacitorOutbound) +# async def get_capacitor( +# inbound_data: api_schema.CapacitorInbound, db: Session = Depends(get_db) +# ): +# """ +# Get a capacitor based on requirements. +# """ +# try: +# with db as session: +# query = session.query(models.Capacitor) + +# for filter in common_filters: +# query = filter(query, models.Capacitor, inbound_data) + +# for filter in capacitor_filters: +# query = filter(query, inbound_data) + +# # Sort the candidates by rating +# query = query.order_by(models.Capacitor.rating.desc()) + +# # Execute the query +# candidate = query.first() +# if candidate is None: +# raise HTTPException(status_code=404, detail="No capacitor found") +# return candidate +# except HTTPException as e: +# raise HTTPException(status_code=e.status_code, detail=e.detail) + + +# @app.post("/capacitor/diagnostic", response_model=api_schema.DiagnosticReport) +# async def get_capacitor_diagnostic( +# inbound_data: api_schema.CapacitorQuery, db: Session = Depends(get_db) +# ): +# """ +# Get a diagnostics report on a capacitor search. +# """ +# try: +# # TODO: --- +# with db as session: +# filter_count_results = [] +# for filter_func in common_filters: +# query = session.query(models.Capacitor) +# query = filter_func(query, models.Capacitor, inbound_data) +# count = query.count() +# filter_count_results.append( +# api_schema.FilterResults( +# filter_name=filter_func.__name__, count=count +# ) +# ) +# for filter_func in capacitor_filters: +# query = session.query(models.Capacitor) +# query = filter_func(query, inbound_data) +# count = query.count() +# filter_count_results.append( +# api_schema.FilterResults( +# filter_name=filter_func.__name__, count=count +# ) +# ) + +# # Check if results are available with other packages +# query = session.query(models.Capacitor) +# query = temperature_rating_filter(query, models.Capacitor, inbound_data) +# for filter in capacitor_filters: +# query = filter(query, inbound_data) +# count = query.count() +# can_find_results_with_other_packages = False +# if count > 0: +# can_find_results_with_other_packages = True + +# report = api_schema.DiagnosticReport( +# component_type="capacitor", +# search_parameters=inbound_data, +# individual_filter_results=filter_count_results, +# can_find_results_with_other_packages=can_find_results_with_other_packages, +# ) + +# return report +# except Exception as e: +# raise HTTPException(status_code=500, detail=str(e)) + + +# ##################### +# # Inductor endpoint +# ##################### + + +# def inductor_inductance_filter(query: Query, api_data: api_schema.Inductor): +# return query.filter( +# models.Inductor.inductance_min_henry >= api_data.inductance_henry.min, +# models.Inductor.inductance_max_henry <= api_data.inductance_henry.max, +# ) + + +# def inductor_current_rating_filter(query: Query, api_data: api_schema.Inductor): +# return query.filter( +# models.Inductor.current_rating_min_amperes +# >= api_data.current_rating_amperes.min, +# models.Inductor.current_rating_max_amperes +# <= api_data.current_rating_amperes.max, +# ) + + +# def inductor_saturation_current_filter(query: Query, api_data: api_schema.Inductor): +# if api_data.saturation_current_amperes is None: +# return query +# return query.filter( +# models.Inductor.saturation_current_min_amperes +# >= api_data.saturation_current_amperes.min, +# models.Inductor.saturation_current_max_amperes +# <= api_data.saturation_current_amperes.max, +# ) + + +# def inductor_rms_current_filter(query: Query, api_data: api_schema.Inductor): +# if api_data.rms_current_amperes is None: +# return query +# return query.filter( +# models.Inductor.rms_current_min_amperes >= api_data.rms_current_amperes.min, +# models.Inductor.rms_current_max_amperes <= api_data.rms_current_amperes.max, +# ) + + +# def inductor_resonant_frequency_filter(query: Query, api_data: api_schema.Inductor): +# if api_data.resonant_frequency_hertz is None: +# return query +# return query.filter( +# models.Inductor.resonant_frequency_min_hertz +# >= api_data.resonant_frequency_hertz.min, +# models.Inductor.resonant_frequency_max_hertz +# <= api_data.resonant_frequency_hertz.max, +# ) + + +# def inductor_resistance_filter(query: Query, api_data: api_schema.Inductor): +# if api_data.resistance_ohms is None: +# return query +# return query.filter( +# models.Inductor.resistance_min_ohms >= api_data.resistance_ohms.min, +# models.Inductor.resistance_max_ohms <= api_data.resistance_ohms.max, +# ) + + +# def inductor_temperature_coefficient_filter( +# query: Query, api_data: api_schema.Inductor +# ): +# if api_data.temperature_coefficient_henry_per_celsius is None: +# return query +# return query.filter( +# models.Inductor.temperature_coefficient_min_henry_per_celsius +# >= api_data.temperature_coefficient_henry_per_celsius.min, +# models.Inductor.temperature_coefficient_max_henry_per_celsius +# <= api_data.temperature_coefficient_henry_per_celsius.max, +# ) + + +# inductor_filters = [ +# inductor_inductance_filter, +# inductor_current_rating_filter, +# inductor_saturation_current_filter, +# inductor_rms_current_filter, +# inductor_resonant_frequency_filter, +# inductor_resistance_filter, +# inductor_temperature_coefficient_filter, +# ] + + +# @app.post("/inductor", response_model=api_schema.InductorResponse) +# async def get_inductor( +# inbound_data: api_schema.InductorQuery, db: Session = Depends(get_db) +# ): +# """ +# Get an inductor based on requirements. +# """ +# try: +# with db as session: +# query = session.query(models.Inductor) + +# for filter in common_filters: +# query = filter(query, models.Inductor, inbound_data) + +# for filter in inductor_filters: +# query = filter(query, inbound_data) + +# # Sort the candidates by rating +# query = query.order_by(models.Inductor.rating.desc()) + +# # Execute the query +# candidate = query.first() +# if candidate is None: +# raise HTTPException(status_code=404, detail="No capacitor found") +# return candidate +# except HTTPException as e: +# raise HTTPException(status_code=e.status_code, detail=e.detail) + + +# @app.post("/inductor/diagnostic", response_model=api_schema.DiagnosticReport) +# async def get_inductor_diagnostic( +# inbound_data: api_schema.InductorQuery, db: Session = Depends(get_db) +# ): +# """ +# Get a diagnostics report on an inductor search. +# """ +# try: +# with db as session: +# filter_count_results = [] +# for filter_func in common_filters: +# query = session.query(models.Inductor) +# query = filter_func(query, models.Inductor, inbound_data) +# count = query.count() +# filter_count_results.append( +# api_schema.FilterResults( +# filter_name=filter_func.__name__, count=count +# ) +# ) +# for filter_func in inductor_filters: +# query = session.query(models.Inductor) +# query = filter_func(query, inbound_data) +# count = query.count() +# filter_count_results.append( +# api_schema.FilterResults( +# filter_name=filter_func.__name__, count=count +# ) +# ) + +# # Check if results are available with other packages +# query = session.query(models.Inductor) +# query = temperature_rating_filter(query, models.Inductor, inbound_data) +# for filter in inductor_filters: +# query = filter(query, inbound_data) +# count = query.count() +# can_find_results_with_other_packages = False +# if count > 0: +# can_find_results_with_other_packages = True + +# report = api_schema.DiagnosticReport( +# component_type="inductor", +# search_parameters=inbound_data, +# individual_filter_results=filter_count_results, +# can_find_results_with_other_packages=can_find_results_with_other_packages, +# ) + +# return report +# except Exception as e: +# raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/jlcpcb_scraper/session.py b/src/session.py similarity index 95% rename from jlcpcb_scraper/session.py rename to src/session.py index a51abc8..c3b0ba1 100755 --- a/jlcpcb_scraper/session.py +++ b/src/session.py @@ -1,9 +1,7 @@ -import json - from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -from config import config +from src.config import config if "postgresql" in config.SQLALCHEMY_DATABASE_URI: engine = create_engine( diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 93903ed..1dca5b9 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,6 +1,6 @@ def test_voltage(): - from jlcpcb_scraper.parsers import voltage + from src.parsers import voltage assert voltage("10V") == 10 assert voltage("10V 20V") == 10 assert voltage("20V 10V") == 20 @@ -8,7 +8,7 @@ def test_voltage(): def test_dielectric(): - from jlcpcb_scraper.parsers import dielectric + from src.parsers import dielectric assert dielectric("asdasd asdas X5P") == "X5P" assert dielectric("12312kjnkj123 X7R") == "X7R" assert dielectric("X7R 10%") == "X7R" From dd66d6d6e3933d75d744de0c05d25e965de0cdc6 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Wed, 10 Jul 2024 14:32:32 -0700 Subject: [PATCH 08/11] Add .env.example --- .env.example | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..fbfd1c3 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +SQLALCHEMY_DATABASE_URI=postgresql+psycopg://atopile:changeme@localhost/atopile-components + +POSTGRES_USER=atopile +POSTGRES_PASSWORD=my-password +POSTGRES_DB=atopile-components + +JLCPCB_KEY="app_some_key" +JLCPCB_SECRET="app_some_secret" From 9971ca2f1ce2deb1e41d471c01de425b7009725e Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Wed, 10 Jul 2024 15:46:16 -0700 Subject: [PATCH 09/11] Combined component server with scraper --- README.md | 7 ++ alembic.ini | 6 +- alembic/env.py | 4 +- ...add_range_for_power_and_voltage_ratings.py | 40 ++++++++++ {src => component_server}/__init__.py | 0 {src => component_server}/base.py | 2 +- {src => component_server}/config.py | 0 {src => component_server}/factory.py | 4 +- {src => component_server}/jlcpcb_scraper.py | 0 component_server/models.py | 80 +++++++++++++++++++ {src => component_server}/parsers.py | 0 {src => component_server}/scraper.py | 8 +- {src => component_server}/server.py | 29 ++++++- {src => component_server}/session.py | 18 ++--- setup.py | 4 +- src/models.py | 75 ----------------- tests/test_parsers.py | 4 +- 17 files changed, 178 insertions(+), 103 deletions(-) create mode 100644 alembic/versions/a2e2efe545fd_add_range_for_power_and_voltage_ratings.py rename {src => component_server}/__init__.py (100%) rename {src => component_server}/base.py (68%) rename {src => component_server}/config.py (100%) rename {src => component_server}/factory.py (98%) rename {src => component_server}/jlcpcb_scraper.py (100%) create mode 100644 component_server/models.py rename {src => component_server}/parsers.py (100%) rename {src => component_server}/scraper.py (89%) rename {src => component_server}/server.py (94%) rename {src => component_server}/session.py (52%) delete mode 100644 src/models.py diff --git a/README.md b/README.md index ff04267..fed6629 100644 --- a/README.md +++ b/README.md @@ -92,3 +92,10 @@ To list the tables, invoke `\dt`. To quit, invoke `exit`. To delete tables we ar 1. Run `docker-compose up` to start PostgreSQL. 2. Run `fastapi dev endpoints.py` to start the FastAPI server. + + +## Up-revving database + +1. Upgrade the schema +2. Create a revision `alembic revision --autogenerate -m "Why?"` +3. Run the upgrade on the `alembic upgrade head` diff --git a/alembic.ini b/alembic.ini index 8db3b4b..a336f02 100644 --- a/alembic.ini +++ b/alembic.ini @@ -82,7 +82,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # Logging configuration [loggers] -keys = root,sqlalchemy,alembic,jlcpcb_scraper +keys = root,sqlalchemy,alembic,component_server [handlers] keys = console @@ -105,10 +105,10 @@ level = INFO handlers = qualname = alembic -[logger_jlcpcb_scraper] +[logger_component_server] level = INFO handlers = -qualname = jlcpcb_scraper +qualname = component_server [handler_console] class = StreamHandler diff --git a/alembic/env.py b/alembic/env.py index e382e5f..b3000fb 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -5,8 +5,8 @@ from alembic import context -from src.config import config as app_config -from src.base import * +from component_server.config import config as app_config +from component_server.base import * # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/alembic/versions/a2e2efe545fd_add_range_for_power_and_voltage_ratings.py b/alembic/versions/a2e2efe545fd_add_range_for_power_and_voltage_ratings.py new file mode 100644 index 0000000..8806185 --- /dev/null +++ b/alembic/versions/a2e2efe545fd_add_range_for_power_and_voltage_ratings.py @@ -0,0 +1,40 @@ +"""Add range for power and voltage ratings + +Revision ID: a2e2efe545fd +Revises: c041226a1f05 +Create Date: 2024-07-10 15:44:03.676990 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'a2e2efe545fd' +down_revision: Union[str, None] = 'c041226a1f05' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('capacitors', sa.Column('operating_voltage_volts_min', sa.Float(), nullable=True)) + op.add_column('capacitors', sa.Column('operating_voltage_volts_max', sa.Float(), nullable=True)) + op.drop_column('capacitors', 'rated_voltage_volts') + op.add_column('resistors', sa.Column('operating_power_watts_min', sa.Float(), nullable=True)) + op.add_column('resistors', sa.Column('operating_power_watts_max', sa.Float(), nullable=True)) + op.drop_column('resistors', 'rated_power_watts') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('resistors', sa.Column('rated_power_watts', sa.DOUBLE_PRECISION(precision=53), autoincrement=False, nullable=True)) + op.drop_column('resistors', 'operating_power_watts_max') + op.drop_column('resistors', 'operating_power_watts_min') + op.add_column('capacitors', sa.Column('rated_voltage_volts', sa.DOUBLE_PRECISION(precision=53), autoincrement=False, nullable=True)) + op.drop_column('capacitors', 'operating_voltage_volts_max') + op.drop_column('capacitors', 'operating_voltage_volts_min') + # ### end Alembic commands ### diff --git a/src/__init__.py b/component_server/__init__.py similarity index 100% rename from src/__init__.py rename to component_server/__init__.py diff --git a/src/base.py b/component_server/base.py similarity index 68% rename from src/base.py rename to component_server/base.py index 3a9f8e0..1a5c719 100755 --- a/src/base.py +++ b/component_server/base.py @@ -1,3 +1,3 @@ # Import all the models, so that Base has them before being # imported by Alembic -from config.models import * +from component_server.models import * diff --git a/src/config.py b/component_server/config.py similarity index 100% rename from src/config.py rename to component_server/config.py diff --git a/src/factory.py b/component_server/factory.py similarity index 98% rename from src/factory.py rename to component_server/factory.py index 1f5cd16..170aa81 100644 --- a/src/factory.py +++ b/component_server/factory.py @@ -6,8 +6,8 @@ import logging from typing import Type, TypeVar -from src import parsers -from src.models import Capacitor, Part, Resistor +from component_server import parsers +from component_server.models import Capacitor, Part, Resistor T = TypeVar("T", bound="AbstractModelFactory") diff --git a/src/jlcpcb_scraper.py b/component_server/jlcpcb_scraper.py similarity index 100% rename from src/jlcpcb_scraper.py rename to component_server/jlcpcb_scraper.py diff --git a/component_server/models.py b/component_server/models.py new file mode 100644 index 0000000..a4d5443 --- /dev/null +++ b/component_server/models.py @@ -0,0 +1,80 @@ +import datetime +import operator + +from sqlalchemy import Column, DateTime, Float, Integer, String +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.orm import Mapped, Session, declarative_base + +Base = declarative_base() + + +def in_(x: Column, y): + return x.in_(y) + + +class Part(Base): + """Abstract base of a part.""" + __abstract__ = True + + # Normal fields + id = Column(Integer, primary_key=True) + last_update = Column(DateTime, default=datetime.datetime.now(datetime.UTC)) + + # Things we need to commonly update + price : Mapped[float] = Column(Float) + stock : Mapped[int] = Column(Integer) + overhead_cost : Mapped[float] = Column(Float) # The cost of the setup, handling and shipping etc... as overhead of using this SKU + rating : Mapped[str] = Column(Integer) # Rating is a magic number representing stock, basic part status and cost. The higher the better. + + lcsc_id : Mapped[str] = Column(String, info={"return": True}, unique=True) + mpn : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}) + + package : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}, nullable=True) + footprint_name: Mapped[str] = Column(String, info={"return": True}) + + +class Resistor(Part): + """A model for a resistor part.""" + __tablename__ = "resistors" + + resistance_ohms_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + resistance_ohms_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_power_watts_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_power_watts_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + +class Capacitor(Part): + """A model for a capacitor part.""" + __tablename__ = "capacitors" + + capacitance_farads_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + capacitance_farads_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_voltage_volts_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_voltage_volts_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + # Dielectric type information: rated temperature range and temperature variation + # https://blog.knowlescapacitors.com/blog/simplify-capacitor-dielectric-selection-by-understanding-dielectric-coding-methods + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + # TODO: figure out how we can encode tolerance based on temperature variation + dielectric_code: Mapped[str] = Column(String) + + +def create_or_update_part(session: Session, comp: Part) -> Part: + d = {k: v for k, v in comp.__dict__.items() if k in comp.__table__.columns.keys() and k != 'id'} + stmt = insert(comp.__class__).values(**d).on_conflict_do_update( + index_elements=['lcsc_id'], + set_={ + 'price': comp.price, + 'stock': comp.stock, + 'last_update': comp.last_update + } + ) + + session.execute(stmt) + return comp diff --git a/src/parsers.py b/component_server/parsers.py similarity index 100% rename from src/parsers.py rename to component_server/parsers.py diff --git a/src/scraper.py b/component_server/scraper.py similarity index 89% rename from src/scraper.py rename to component_server/scraper.py index cb55c93..79922fa 100644 --- a/src/scraper.py +++ b/component_server/scraper.py @@ -19,10 +19,10 @@ import alembic.config from alembic import command -from src.config import config -from src.factory import process -from src.models import Part, create_or_update_part -from src.jlcpcb_scraper import JlcpcbScraper +from component_server.config import config +from component_server.factory import process +from component_server.models import Part, create_or_update_part +from component_server.jlcpcb_scraper import JlcpcbScraper # %% diff --git a/src/server.py b/component_server/server.py similarity index 94% rename from src/server.py rename to component_server/server.py index 3c89dc8..7b49f68 100755 --- a/src/server.py +++ b/component_server/server.py @@ -8,8 +8,8 @@ from pydantic import BaseModel, create_model from sqlalchemy.orm import DeclarativeBase, Query, Session -from src import models -from src.session import SessionLocal +from component_server import models +from component_server.session import SessionLocal app = FastAPI() @@ -52,7 +52,7 @@ def _create_pydantic_response_model(model: Type[DeclarativeBase]) -> BaseModel: for column in sqlalchemy.inspection.inspect(model).columns: if not column.info.get("return", False): continue - columns[column.name] = (column.type.python_type, None) # No default + columns[column.name] = (Optional[column.type.python_type], None) # No default return create_model(model.__name__, **columns, __config__={"from_attributes": True}) @@ -152,6 +152,29 @@ async def get_resistor_diagnostic( # # Capacitor endpoint # ##################### +CapacitorRequest = _create_pydantic_request_model(models.Capacitor) +CapacitorResponse = _create_pydantic_response_model(models.Capacitor) + + +@app.post("/v2/find/capacitor", response_model=CapacitorResponse) +async def get_capacitor(request: CapacitorRequest, db: Session = Depends(get_db)): + """ + Get a capacitor based on the capacitor specs. + If is a spec is omitted, it will allow any value for that spec in the search. + """ + return _find_component(models.Capacitor, request, db) + + +@app.post("/v2/find/diagnose/capacitor", response_model=DiagnosticReport) +async def get_capacitor_diagnostic( + request: CapacitorRequest, db: Session = Depends(get_db) +): + """ + Get a diagnostics report on a capacitor search. + """ + return _do_diag(models.Capacitor, request, db) + + # def capacitor_capacitance_filter(query: Query, api_data: api_schema.Capacitor): # return query.filter( diff --git a/src/session.py b/component_server/session.py similarity index 52% rename from src/session.py rename to component_server/session.py index c3b0ba1..2acea67 100755 --- a/src/session.py +++ b/component_server/session.py @@ -1,19 +1,19 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -from src.config import config +from component_server.config import config if "postgresql" in config.SQLALCHEMY_DATABASE_URI: engine = create_engine( config.SQLALCHEMY_DATABASE_URI, - pool_size=10, - pool_pre_ping=True, - executemany_values_page_size=50000, - executemany_batch_page_size=2000, - pool_timeout=30, # add pool_timeout parameter - max_overflow=10, # increase max_overflow parameter - pool_recycle=600, # add pool_recycle parameter - pool_use_lifo=True, + # pool_size=10, + # pool_pre_ping=True, + # executemany_values_page_size=50000, + # executemany_batch_page_size=2000, + # pool_timeout=30, # add pool_timeout parameter + # max_overflow=10, # increase max_overflow parameter + # pool_recycle=600, # add pool_recycle parameter + # pool_use_lifo=True, ) else: engine = create_engine( diff --git a/setup.py b/setup.py index 1bf663c..d4c857b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup( - name='jlcpcb_scraper', + name='component_server', version='0.1', packages=find_packages(), install_requires=[ @@ -9,7 +9,7 @@ 'beautifulsoup4', ], entry_points={}, - url='https://github.com/yourusername/jlcpcb_scraper', + url='https://github.com/atopile/component-server', license='MIT', author='Your Name', author_email='your.email@example.com', diff --git a/src/models.py b/src/models.py deleted file mode 100644 index 6a5fe9d..0000000 --- a/src/models.py +++ /dev/null @@ -1,75 +0,0 @@ -import datetime -import operator - -from sqlalchemy import Column, DateTime, Float, Integer, String -from sqlalchemy.dialects.postgresql import insert -from sqlalchemy.orm import Mapped, Session, declarative_base - -Base = declarative_base() - - -def in_(x: Column, y): - return x.in_(y) - - -class Part(Base): - """Abstract base of a part.""" - __abstract__ = True - - # Normal fields - id = Column(Integer, primary_key=True) - last_update = Column(DateTime, default=datetime.datetime.now(datetime.UTC)) - - # Things we need to commonly update - price : Mapped[float] = Column(Float) - stock : Mapped[int] = Column(Integer) - overhead_cost : Mapped[float] = Column(Float) # The cost of the setup, handling and shipping etc... as overhead of using this SKU - rating : Mapped[str] = Column(Integer) # Rating is a magic number representing stock, basic part status and cost. The higher the better. - - lcsc_id : Mapped[str] = Column(String, info={"return": True}, unique=True) - mpn : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}) - - package : Mapped[str] = Column(String, info={"return": True, "query_operator": in_}, nullable=True) - footprint_name : Mapped[str] = Column(String, info={"return": True}) - - -class Resistor(Part): - """A model for a resistor part.""" - __tablename__ = "resistors" - - resistance_ohms_min : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - resistance_ohms_max : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) - rated_power_watts : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) - operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - - -class Capacitor(Part): - """A model for a capacitor part.""" - __tablename__ = "capacitors" - - capacitance_farads_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - capacitance_farads_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) - rated_voltage_volts : Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - - # Dielectric type information: rated temperature range and temperature variation - # https://blog.knowlescapacitors.com/blog/simplify-capacitor-dielectric-selection-by-understanding-dielectric-coding-methods - operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) - operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) - # TODO: figure out how we can encode tolerance based on temperature variation - dielectric_code : Mapped[str] = Column(String) - - -def create_or_update_part(session: Session, comp: Part) -> Part: - d = {k: v for k, v in comp.__dict__.items() if k in comp.__table__.columns.keys() and k != 'id'} - stmt = insert(comp.__class__).values(**d).on_conflict_do_update( - index_elements=['lcsc_id'], - set_={ - 'price': comp.price, - 'stock': comp.stock, - 'last_update': comp.last_update - } - ) - - session.execute(stmt) - return comp diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 1dca5b9..5ce35fd 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,6 +1,6 @@ def test_voltage(): - from src.parsers import voltage + from component_server.parsers import voltage assert voltage("10V") == 10 assert voltage("10V 20V") == 10 assert voltage("20V 10V") == 20 @@ -8,7 +8,7 @@ def test_voltage(): def test_dielectric(): - from src.parsers import dielectric + from component_server.parsers import dielectric assert dielectric("asdasd asdas X5P") == "X5P" assert dielectric("12312kjnkj123 X7R") == "X7R" assert dielectric("X7R 10%") == "X7R" From 6701ef28d50d98a20db892afb116891acf0273a7 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Wed, 10 Jul 2024 17:34:53 -0700 Subject: [PATCH 10/11] Add inductor support --- alembic/versions/0ad54fb43dd5_add_inductor.py | 51 +++ component_server/factory.py | 103 +++++- component_server/models.py | 17 + component_server/scraper.py | 18 +- component_server/server.py | 311 ++---------------- 5 files changed, 202 insertions(+), 298 deletions(-) create mode 100644 alembic/versions/0ad54fb43dd5_add_inductor.py diff --git a/alembic/versions/0ad54fb43dd5_add_inductor.py b/alembic/versions/0ad54fb43dd5_add_inductor.py new file mode 100644 index 0000000..27207a1 --- /dev/null +++ b/alembic/versions/0ad54fb43dd5_add_inductor.py @@ -0,0 +1,51 @@ +"""Add inductor + +Revision ID: 0ad54fb43dd5 +Revises: a2e2efe545fd +Create Date: 2024-07-10 17:23:21.012278 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '0ad54fb43dd5' +down_revision: Union[str, None] = 'a2e2efe545fd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('inductor', + sa.Column('inductance_henries_min', sa.Float(), nullable=True), + sa.Column('inductance_henries_max', sa.Float(), nullable=True), + sa.Column('dc_resistance_min', sa.Float(), nullable=True), + sa.Column('dc_resistance_max', sa.Float(), nullable=True), + sa.Column('operating_current_amps_min', sa.Float(), nullable=True), + sa.Column('operating_current_amps_max', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_min', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_max', sa.Float(), nullable=True), + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('last_update', sa.DateTime(), nullable=True), + sa.Column('price', sa.Float(), nullable=True), + sa.Column('stock', sa.Integer(), nullable=True), + sa.Column('overhead_cost', sa.Float(), nullable=True), + sa.Column('rating', sa.Integer(), nullable=True), + sa.Column('lcsc_id', sa.String(), nullable=True), + sa.Column('mpn', sa.String(), nullable=True), + sa.Column('package', sa.String(), nullable=True), + sa.Column('footprint_name', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('lcsc_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('inductor') + # ### end Alembic commands ### diff --git a/component_server/factory.py b/component_server/factory.py index 170aa81..2135fbb 100644 --- a/component_server/factory.py +++ b/component_server/factory.py @@ -7,7 +7,7 @@ from typing import Type, TypeVar from component_server import parsers -from component_server.models import Capacitor, Part, Resistor +from component_server.models import Capacitor, Inductor, Part, Resistor T = TypeVar("T", bound="AbstractModelFactory") @@ -142,7 +142,8 @@ async def build(self, data: dict) -> Resistor | None: footprint_name = self._known_footprints.get(data.get("package")), resistance_ohms_min = resistance_ohms_min, resistance_ohms_max = resistance_ohms_max, - rated_power_watts = parsers.power(data.get("description")), + operating_power_watts_min = 0, + operating_power_watts_max = parsers.power(data.get("description")), operating_temp_celsius_min = None, # TODO: operating_temp_celsius_max = None, # TODO: **common @@ -204,8 +205,6 @@ async def build(self, data: dict) -> Capacitor | None: capacitance_farads_min = nominal_capacitance * (1 - tolerance_pct / 100) capacitance_farads_max = nominal_capacitance * (1 + tolerance_pct / 100) - rated_voltage_volts = parsers.voltage(data.get("description")) - dielectric_code = parsers.dielectric(data.get("description")) if dielectric_code: operating_temp_celsius_min = self.dielectric_min_temp.get(dielectric_code[0]) @@ -219,11 +218,20 @@ async def build(self, data: dict) -> Capacitor | None: log.debug("Rejected because common data couldn't be found") return + rated_voltage = parsers.voltage(data.get("description")) + if rated_voltage is None: + operating_voltage_volts_min = None + operating_voltage_volts_max = None + else: + operating_voltage_volts_min = -rated_voltage + operating_voltage_volts_max = rated_voltage + return Capacitor( footprint_name = self._known_footprints.get(data.get("package")), capacitance_farads_min=capacitance_farads_min, capacitance_farads_max=capacitance_farads_max, - rated_voltage_volts=rated_voltage_volts, + operating_voltage_volts_min=operating_voltage_volts_min, + operating_voltage_volts_max=operating_voltage_volts_max, operating_temp_celsius_min=operating_temp_celsius_min, operating_temp_celsius_max=operating_temp_celsius_max, dielectric_code=dielectric_code, @@ -234,6 +242,91 @@ async def build(self, data: dict) -> Capacitor | None: CapacitorFactory.register() +class InductorFactory(AbstractModelFactory): + _known_footprints = { + "0201": "L0201", + "0402": "L0402", + "0603": "L0603", + "0805": "L0805", + "1206": "L1206", + "1210": "L1210", + "1806": "L1806", + "1812": "L1812", + "2010": "L2010", + "2512": "L2512", + } + + dielectric_min_temp = { + "X": -55, + "Y": -30, + "Z": 10, + } + + dielectric_max_temp = { + "4": 65, + "5": 85, + "6": 105, + "7": 125, + "8": 150, + "9": 200, + } + + @classmethod + def for_me(cls, data: dict) -> bool: + return ( + data.get("firstCategory") in [ + "Inductors & Chokes & Transformers", + "Inductors, Coils, Chokes", + "Inductors/Coils/Transformers" + ] and + data.get("secondCategory") in ["Power Inductors", "Inductors (SMD)"] + ) + + async def build(self, data: dict) -> Capacitor | None: + nominal_inductance = parsers.inductance(data.get("description")) + if not nominal_inductance: + # Handle both zero and None + log.debug("Rejected because inductance couldn't be found") + return + tolerance_pct = parsers.percent(data.get("description")) + if tolerance_pct is None: + log.debug("Rejected because tolerance couldn't be found") + return + inductance_henries_min = nominal_inductance * (1 - tolerance_pct / 100) + inductance_henries_max = nominal_inductance * (1 + tolerance_pct / 100) + + dc_resistance_max = dc_resistance_min = parsers.resistance(data.get("description")) + + rated_current = parsers.current(data.get("description")) + if rated_current is None: + operating_current_amps_min = None + operating_current_amps_max = None + else: + operating_current_amps_min = -rated_current + operating_current_amps_max = rated_current + + common = self._get_common(data) + if not common: + log.debug("Rejected because common data couldn't be found") + return + + return Inductor( + footprint_name = self._known_footprints.get(data.get("package")), + inductance_henries_min=inductance_henries_min, + inductance_henries_max=inductance_henries_max, + dc_resistance_min=dc_resistance_min, + dc_resistance_max=dc_resistance_max, + operating_current_amps_min=operating_current_amps_min, + operating_current_amps_max=operating_current_amps_max, + operating_temp_celsius_min=None, + operating_temp_celsius_max=None, + **common + ) + + +InductorFactory.register() + + async def process(data: dict) -> Part | None: for factory in AbstractModelFactory.factories: if factory.for_me(data): diff --git a/component_server/models.py b/component_server/models.py index a4d5443..36ce763 100644 --- a/component_server/models.py +++ b/component_server/models.py @@ -65,6 +65,23 @@ class Capacitor(Part): dielectric_code: Mapped[str] = Column(String) +class Inductor(Part): + """A model for a inductor part.""" + __tablename__ = "inductor" + + inductance_henries_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + inductance_henries_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + dc_resistance_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + dc_resistance_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_current_amps_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_current_amps_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + def create_or_update_part(session: Session, comp: Part) -> Part: d = {k: v for k, v in comp.__dict__.items() if k in comp.__table__.columns.keys() and k != 'id'} stmt = insert(comp.__class__).values(**d).on_conflict_do_update( diff --git a/component_server/scraper.py b/component_server/scraper.py index 79922fa..506705c 100644 --- a/component_server/scraper.py +++ b/component_server/scraper.py @@ -12,6 +12,7 @@ import asyncio import logging +import pathlib from datetime import UTC, datetime, timedelta from sqlalchemy import create_engine @@ -21,9 +22,8 @@ from alembic import command from component_server.config import config from component_server.factory import process -from component_server.models import Part, create_or_update_part from component_server.jlcpcb_scraper import JlcpcbScraper - +from component_server.models import Part, create_or_update_part # %% # 1. Manage db schema with alembic @@ -43,10 +43,22 @@ # %% -# 3. Initialize JLCPCB scraper with the current category models +# 3. Brrrr... +categories = set() +categories_path = pathlib.Path("categories.txt") +categories_path.unlink(missing_ok=True) +categories_path.touch() + scraper = JlcpcbScraper(config.JLCPCB_KEY, config.JLCPCB_SECRET) for i, part_data in enumerate(scraper.get_parts()): log.debug("Processing part %s", i) + # Dump the categories to a file for reference + category = (part_data["firstCategory"], part_data["secondCategory"]) + if category not in categories: + categories.add(category) + log.info("Adding category %s", category) + with categories_path.open("a") as f: + f.write(f"{category}\n") if part := asyncio.run(process(part_data)): log.debug("Part %s accepted", i) diff --git a/component_server/server.py b/component_server/server.py index 7b49f68..df08b86 100755 --- a/component_server/server.py +++ b/component_server/server.py @@ -175,301 +175,32 @@ async def get_capacitor_diagnostic( return _do_diag(models.Capacitor, request, db) - -# def capacitor_capacitance_filter(query: Query, api_data: api_schema.Capacitor): -# return query.filter( -# models.Capacitor.capacitance_min_farads >= api_data.capacitance_farads.min, -# models.Capacitor.capacitance_max_farads <= api_data.capacitance_farads.max, -# ) - - -# def capacitor_voltage_rating_filter(query: Query, api_data: api_schema.Capacitor): -# return query.filter( -# models.Capacitor.voltage_rating_min_volts -# <= api_data.voltage_rating_volts.min, -# models.Capacitor.voltage_rating_max_volts -# >= api_data.voltage_rating_volts.max, -# ) - - -# def capacitor_equivalent_series_resistance_filter( -# query: Query, api_data: api_schema.Capacitor -# ): -# if api_data.equivalent_series_resistance_ohms is None: -# return query -# return query.filter( -# models.Capacitor.equivalent_series_resistance_min_ohms -# >= api_data.equivalent_series_resistance_ohms.min, -# models.Capacitor.equivalent_series_resistance_max_ohms -# <= api_data.equivalent_series_resistance_ohms.max, -# ) - - -# def capacitor_temperature_coefficient_filter( -# query: Query, api_data: api_schema.Capacitor -# ): -# if api_data.temperature_coefficient_farads_per_celsius is None: -# return query -# return query.filter( -# models.Capacitor.temperature_coefficient_min_farads_per_celsius -# <= api_data.temperature_coefficient_farads_per_celsius.min, -# models.Capacitor.temperature_coefficient_max_farads_per_celsius -# >= api_data.temperature_coefficient_farads_per_celsius.max, -# ) - - -# capacitor_filters = [ -# capacitor_capacitance_filter, -# capacitor_voltage_rating_filter, -# capacitor_equivalent_series_resistance_filter, -# capacitor_temperature_coefficient_filter, -# ] - - -# @app.post("/capacitor", response_model=api_schema.CapacitorOutbound) -# async def get_capacitor( -# inbound_data: api_schema.CapacitorInbound, db: Session = Depends(get_db) -# ): -# """ -# Get a capacitor based on requirements. -# """ -# try: -# with db as session: -# query = session.query(models.Capacitor) - -# for filter in common_filters: -# query = filter(query, models.Capacitor, inbound_data) - -# for filter in capacitor_filters: -# query = filter(query, inbound_data) - -# # Sort the candidates by rating -# query = query.order_by(models.Capacitor.rating.desc()) - -# # Execute the query -# candidate = query.first() -# if candidate is None: -# raise HTTPException(status_code=404, detail="No capacitor found") -# return candidate -# except HTTPException as e: -# raise HTTPException(status_code=e.status_code, detail=e.detail) - - -# @app.post("/capacitor/diagnostic", response_model=api_schema.DiagnosticReport) -# async def get_capacitor_diagnostic( -# inbound_data: api_schema.CapacitorQuery, db: Session = Depends(get_db) -# ): -# """ -# Get a diagnostics report on a capacitor search. -# """ -# try: -# # TODO: --- -# with db as session: -# filter_count_results = [] -# for filter_func in common_filters: -# query = session.query(models.Capacitor) -# query = filter_func(query, models.Capacitor, inbound_data) -# count = query.count() -# filter_count_results.append( -# api_schema.FilterResults( -# filter_name=filter_func.__name__, count=count -# ) -# ) -# for filter_func in capacitor_filters: -# query = session.query(models.Capacitor) -# query = filter_func(query, inbound_data) -# count = query.count() -# filter_count_results.append( -# api_schema.FilterResults( -# filter_name=filter_func.__name__, count=count -# ) -# ) - -# # Check if results are available with other packages -# query = session.query(models.Capacitor) -# query = temperature_rating_filter(query, models.Capacitor, inbound_data) -# for filter in capacitor_filters: -# query = filter(query, inbound_data) -# count = query.count() -# can_find_results_with_other_packages = False -# if count > 0: -# can_find_results_with_other_packages = True - -# report = api_schema.DiagnosticReport( -# component_type="capacitor", -# search_parameters=inbound_data, -# individual_filter_results=filter_count_results, -# can_find_results_with_other_packages=can_find_results_with_other_packages, -# ) - -# return report -# except Exception as e: -# raise HTTPException(status_code=500, detail=str(e)) - - # ##################### # # Inductor endpoint # ##################### +InductorRequest = _create_pydantic_request_model(models.Inductor) +InductorResponse = _create_pydantic_response_model(models.Inductor) + + +@app.post("/v2/find/inductor", response_model=InductorResponse) +async def get_inductors(request: InductorRequest, db: Session = Depends(get_db)): + """ + Get a inductor based on the inductor specs. + If is a spec is omitted, it will allow any value for that spec in the search. + """ + return _find_component(models.Inductor, request, db) + + +@app.post("/v2/find/diagnose/inductor", response_model=DiagnosticReport) +async def get_inductors_diagnostic( + request: InductorRequest, db: Session = Depends(get_db) +): + """ + Get a diagnostics report on a inductor search. + """ + return _do_diag(models.Inductor, request, db) -# def inductor_inductance_filter(query: Query, api_data: api_schema.Inductor): -# return query.filter( -# models.Inductor.inductance_min_henry >= api_data.inductance_henry.min, -# models.Inductor.inductance_max_henry <= api_data.inductance_henry.max, -# ) - - -# def inductor_current_rating_filter(query: Query, api_data: api_schema.Inductor): -# return query.filter( -# models.Inductor.current_rating_min_amperes -# >= api_data.current_rating_amperes.min, -# models.Inductor.current_rating_max_amperes -# <= api_data.current_rating_amperes.max, -# ) - - -# def inductor_saturation_current_filter(query: Query, api_data: api_schema.Inductor): -# if api_data.saturation_current_amperes is None: -# return query -# return query.filter( -# models.Inductor.saturation_current_min_amperes -# >= api_data.saturation_current_amperes.min, -# models.Inductor.saturation_current_max_amperes -# <= api_data.saturation_current_amperes.max, -# ) - - -# def inductor_rms_current_filter(query: Query, api_data: api_schema.Inductor): -# if api_data.rms_current_amperes is None: -# return query -# return query.filter( -# models.Inductor.rms_current_min_amperes >= api_data.rms_current_amperes.min, -# models.Inductor.rms_current_max_amperes <= api_data.rms_current_amperes.max, -# ) - - -# def inductor_resonant_frequency_filter(query: Query, api_data: api_schema.Inductor): -# if api_data.resonant_frequency_hertz is None: -# return query -# return query.filter( -# models.Inductor.resonant_frequency_min_hertz -# >= api_data.resonant_frequency_hertz.min, -# models.Inductor.resonant_frequency_max_hertz -# <= api_data.resonant_frequency_hertz.max, -# ) - - -# def inductor_resistance_filter(query: Query, api_data: api_schema.Inductor): -# if api_data.resistance_ohms is None: -# return query -# return query.filter( -# models.Inductor.resistance_min_ohms >= api_data.resistance_ohms.min, -# models.Inductor.resistance_max_ohms <= api_data.resistance_ohms.max, -# ) - - -# def inductor_temperature_coefficient_filter( -# query: Query, api_data: api_schema.Inductor -# ): -# if api_data.temperature_coefficient_henry_per_celsius is None: -# return query -# return query.filter( -# models.Inductor.temperature_coefficient_min_henry_per_celsius -# >= api_data.temperature_coefficient_henry_per_celsius.min, -# models.Inductor.temperature_coefficient_max_henry_per_celsius -# <= api_data.temperature_coefficient_henry_per_celsius.max, -# ) - - -# inductor_filters = [ -# inductor_inductance_filter, -# inductor_current_rating_filter, -# inductor_saturation_current_filter, -# inductor_rms_current_filter, -# inductor_resonant_frequency_filter, -# inductor_resistance_filter, -# inductor_temperature_coefficient_filter, -# ] - - -# @app.post("/inductor", response_model=api_schema.InductorResponse) -# async def get_inductor( -# inbound_data: api_schema.InductorQuery, db: Session = Depends(get_db) -# ): -# """ -# Get an inductor based on requirements. -# """ -# try: -# with db as session: -# query = session.query(models.Inductor) - -# for filter in common_filters: -# query = filter(query, models.Inductor, inbound_data) - -# for filter in inductor_filters: -# query = filter(query, inbound_data) - -# # Sort the candidates by rating -# query = query.order_by(models.Inductor.rating.desc()) - -# # Execute the query -# candidate = query.first() -# if candidate is None: -# raise HTTPException(status_code=404, detail="No capacitor found") -# return candidate -# except HTTPException as e: -# raise HTTPException(status_code=e.status_code, detail=e.detail) - - -# @app.post("/inductor/diagnostic", response_model=api_schema.DiagnosticReport) -# async def get_inductor_diagnostic( -# inbound_data: api_schema.InductorQuery, db: Session = Depends(get_db) -# ): -# """ -# Get a diagnostics report on an inductor search. -# """ -# try: -# with db as session: -# filter_count_results = [] -# for filter_func in common_filters: -# query = session.query(models.Inductor) -# query = filter_func(query, models.Inductor, inbound_data) -# count = query.count() -# filter_count_results.append( -# api_schema.FilterResults( -# filter_name=filter_func.__name__, count=count -# ) -# ) -# for filter_func in inductor_filters: -# query = session.query(models.Inductor) -# query = filter_func(query, inbound_data) -# count = query.count() -# filter_count_results.append( -# api_schema.FilterResults( -# filter_name=filter_func.__name__, count=count -# ) -# ) - -# # Check if results are available with other packages -# query = session.query(models.Inductor) -# query = temperature_rating_filter(query, models.Inductor, inbound_data) -# for filter in inductor_filters: -# query = filter(query, inbound_data) -# count = query.count() -# can_find_results_with_other_packages = False -# if count > 0: -# can_find_results_with_other_packages = True - -# report = api_schema.DiagnosticReport( -# component_type="inductor", -# search_parameters=inbound_data, -# individual_filter_results=filter_count_results, -# can_find_results_with_other_packages=can_find_results_with_other_packages, -# ) - -# return report -# except Exception as e: -# raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn From 318d7f993446a4ac0c98f6ae9805a4caf4769c33 Mon Sep 17 00:00:00 2001 From: Matthew Wildoer Date: Thu, 11 Jul 2024 14:49:05 -0700 Subject: [PATCH 11/11] Add MOSFETs to server --- alembic/versions/88955c87f972_add_mosfets.py | 55 +++++++++++++++ component_server/factory.py | 72 +++++++++++++++----- component_server/models.py | 23 +++++++ component_server/parsers.py | 21 ++++++ component_server/server.py | 31 +++++++++ 5 files changed, 185 insertions(+), 17 deletions(-) create mode 100644 alembic/versions/88955c87f972_add_mosfets.py diff --git a/alembic/versions/88955c87f972_add_mosfets.py b/alembic/versions/88955c87f972_add_mosfets.py new file mode 100644 index 0000000..e207fa2 --- /dev/null +++ b/alembic/versions/88955c87f972_add_mosfets.py @@ -0,0 +1,55 @@ +"""Add mosfets + +Revision ID: 88955c87f972 +Revises: 0ad54fb43dd5 +Create Date: 2024-07-11 14:44:09.309268 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '88955c87f972' +down_revision: Union[str, None] = '0ad54fb43dd5' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('mosfet', + sa.Column('operating_voltage_volts_min', sa.Float(), nullable=True), + sa.Column('operating_voltage_volts_max', sa.Float(), nullable=True), + sa.Column('operating_current_amps_min', sa.Float(), nullable=True), + sa.Column('operating_current_amps_max', sa.Float(), nullable=True), + sa.Column('operating_power_watts_min', sa.Float(), nullable=True), + sa.Column('operating_power_watts_max', sa.Float(), nullable=True), + sa.Column('gate_voltage_volts_min', sa.Float(), nullable=True), + sa.Column('gate_voltage_volts_max', sa.Float(), nullable=True), + sa.Column('on_resistance_ohms_min', sa.Float(), nullable=True), + sa.Column('on_resistance_ohms_max', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_min', sa.Float(), nullable=True), + sa.Column('operating_temp_celsius_max', sa.Float(), nullable=True), + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('last_update', sa.DateTime(), nullable=True), + sa.Column('price', sa.Float(), nullable=True), + sa.Column('stock', sa.Integer(), nullable=True), + sa.Column('overhead_cost', sa.Float(), nullable=True), + sa.Column('rating', sa.Integer(), nullable=True), + sa.Column('lcsc_id', sa.String(), nullable=True), + sa.Column('mpn', sa.String(), nullable=True), + sa.Column('package', sa.String(), nullable=True), + sa.Column('footprint_name', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('lcsc_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('mosfet') + # ### end Alembic commands ### diff --git a/component_server/factory.py b/component_server/factory.py index 2135fbb..adb8a49 100644 --- a/component_server/factory.py +++ b/component_server/factory.py @@ -7,7 +7,7 @@ from typing import Type, TypeVar from component_server import parsers -from component_server.models import Capacitor, Inductor, Part, Resistor +from component_server.models import Capacitor, Inductor, Mosfet, Part, Resistor T = TypeVar("T", bound="AbstractModelFactory") @@ -256,21 +256,6 @@ class InductorFactory(AbstractModelFactory): "2512": "L2512", } - dielectric_min_temp = { - "X": -55, - "Y": -30, - "Z": 10, - } - - dielectric_max_temp = { - "4": 65, - "5": 85, - "6": 105, - "7": 125, - "8": 150, - "9": 200, - } - @classmethod def for_me(cls, data: dict) -> bool: return ( @@ -282,7 +267,7 @@ def for_me(cls, data: dict) -> bool: data.get("secondCategory") in ["Power Inductors", "Inductors (SMD)"] ) - async def build(self, data: dict) -> Capacitor | None: + async def build(self, data: dict) -> Inductor | None: nominal_inductance = parsers.inductance(data.get("description")) if not nominal_inductance: # Handle both zero and None @@ -327,6 +312,59 @@ async def build(self, data: dict) -> Capacitor | None: InductorFactory.register() +class MosfetFactory(AbstractModelFactory): + @classmethod + def for_me(cls, data: dict) -> bool: + category = (data.get("firstCategory"), data.get("secondCategory")) + return category in [ + ('Transistors/Thyristors', 'MOSFETs'), + ('Transistors', 'MOSFET'), + ('Triode/MOS Tube/Transistor', 'MOSFETs'), + ('Transistors', 'MOSFETs'), + ] + + async def build(self, data: dict) -> Mosfet | None: + operating_power_watts_min = 0 + operating_power_watts_max = parsers.power(data.get("description")) + + operating_voltage_volts_min = 0 + operating_voltage_volts_max = parsers.voltage(data.get("description")) + + operating_current_amps_min = 0 + operating_current_amps_max = parsers.current(data.get("description")) + + resistance, voltage, _ = parsers.mosfet_switching_specs(data.get("description")) + gate_voltage_volts_min = voltage + gate_voltage_volts_max = voltage + + on_resistance_ohms_min = resistance + on_resistance_ohms_max = resistance + + common = self._get_common(data) + if not common: + log.debug("Rejected because common data couldn't be found") + return + + return Mosfet( + operating_voltage_volts_min=operating_voltage_volts_min, + operating_voltage_volts_max=operating_voltage_volts_max, + operating_current_amps_min=operating_current_amps_min, + operating_current_amps_max=operating_current_amps_max, + operating_power_watts_min=operating_power_watts_min, + operating_power_watts_max=operating_power_watts_max, + gate_voltage_volts_min=gate_voltage_volts_min, + gate_voltage_volts_max=gate_voltage_volts_max, + on_resistance_ohms_min=on_resistance_ohms_min, + on_resistance_ohms_max=on_resistance_ohms_max, + operating_temp_celsius_min=None, + operating_temp_celsius_max=None, + **common + ) + + +MosfetFactory.register() + + async def process(data: dict) -> Part | None: for factory in AbstractModelFactory.factories: if factory.for_me(data): diff --git a/component_server/models.py b/component_server/models.py index 36ce763..f1ff4a5 100644 --- a/component_server/models.py +++ b/component_server/models.py @@ -82,6 +82,29 @@ class Inductor(Part): operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) +class Mosfet(Part): + """A model for a mosfet part.""" + __tablename__ = "mosfet" + + operating_voltage_volts_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_voltage_volts_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_current_amps_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_current_amps_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_power_watts_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_power_watts_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + gate_voltage_volts_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + gate_voltage_volts_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + on_resistance_ohms_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + on_resistance_ohms_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + operating_temp_celsius_min: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.gt}, nullable=True) + operating_temp_celsius_max: Mapped[float] = Column(Float, info={"return": True, "query_operator": operator.lt}, nullable=True) + + def create_or_update_part(session: Session, comp: Part) -> Part: d = {k: v for k, v in comp.__dict__.items() if k in comp.__table__.columns.keys() and k != 'id'} stmt = insert(comp.__class__).values(**d).on_conflict_do_update( diff --git a/component_server/parsers.py b/component_server/parsers.py index 25062b4..ee3f47e 100644 --- a/component_server/parsers.py +++ b/component_server/parsers.py @@ -55,6 +55,27 @@ def dielectric(description: str) -> str | None: return dielectric_value.group() if dielectric_value else None +def mosfet_switching_specs(description: str) -> tuple[float, float, float] | tuple[None, None, None]: + """ + Parse the switching specification from a component description + eg. 2Ω@10V,450mA + """ + try: + resistance_str, testpoint_str = description.split("@") + voltage_str, current_str = testpoint_str.split(",") + except ValueError: + # In case we can't split the string properly + return None, None, None + + if resistance_float := resistance(resistance_str): + if voltage_float := voltage(voltage_str): + if current_float := current(current_str): + return resistance_float, voltage_float, current_float + + return None, None, None + # if we don't get ALL the values, return None + + def percent(description: str | None) -> float | None: """Parse the percentage from a component description""" if description is None: diff --git a/component_server/server.py b/component_server/server.py index df08b86..9c1b2a2 100755 --- a/component_server/server.py +++ b/component_server/server.py @@ -202,6 +202,37 @@ async def get_inductors_diagnostic( return _do_diag(models.Inductor, request, db) +# ##################### +# # Inductor endpoint +# ##################### + +MosfetRequest = _create_pydantic_request_model(models.Mosfet) +MosfetResponse = _create_pydantic_response_model(models.Mosfet) + + +@app.post("/v2/find/mosfet", response_model=MosfetResponse) +async def get_mosfet(request: MosfetRequest, db: Session = Depends(get_db)): + """ + Get a mosfet based on the mosfet specs. + If is a spec is omitted, it will allow any value for that spec in the search. + """ + return _find_component(models.Mosfet, request, db) + + +@app.post("/v2/find/diagnose/mosfet", response_model=DiagnosticReport) +async def get_mosfet_diagnostic( + request: MosfetRequest, db: Session = Depends(get_db) +): + """ + Get a diagnostics report on a mosfet search. + """ + return _do_diag(models.Mosfet, request, db) + + +# ##################### +# # Main +# ##################### + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)