diff --git a/.github/workflows/dev-cicd.yml b/.github/workflows/dev-cicd.yml index 70bd2491..77ba7789 100644 --- a/.github/workflows/dev-cicd.yml +++ b/.github/workflows/dev-cicd.yml @@ -27,10 +27,24 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10"] + python-version: ["3.10", "3.11", "3.12"] steps: - name: Check out repo uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true - name: Set up Python uses: actions/setup-python@v4 with: @@ -54,7 +68,7 @@ jobs: pip install -e . pip install tox just pre-commit - name: Run Tests with tox - run: tox -- --cov datafog --cov-report xml --cov-report term --codeblocks + run: tox -- --cov datafog --cov-report xml --cov-report term -v -s --cov-report=term-missing - name: Submit to Codecov uses: codecov/codecov-action@v3 with: @@ -62,3 +76,7 @@ jobs: files: ./coverage.xml flags: unittests name: codecov-umbrella + - name: Clean up pip cache + run: | + pip cache purge + rm -rf ~/.cache/pip diff --git a/.github/workflows/feature-cicd.yml b/.github/workflows/feature-cicd.yml index 72b00f4f..54fd4838 100644 --- a/.github/workflows/feature-cicd.yml +++ b/.github/workflows/feature-cicd.yml @@ -31,6 +31,20 @@ jobs: steps: - name: Check out repo uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true - name: Set up Python uses: actions/setup-python@v4 with: @@ -51,10 +65,13 @@ jobs: - name: Install Dependencies run: | pip install -U pip - pip install -e . - pip install tox just pre-commit + pip install --no-cache-dir -e . + pip install --no-cache-dir tox just pre-commit + - name: Free up disk space + run: | + sudo apt-get clean - name: Run Tests with tox - run: tox -- --cov datafog --cov-report xml --cov-report term --codeblocks + run: tox -- --cov datafog --cov-report xml --cov-report term -v -s --cov-report=term-missing - name: Submit to Codecov uses: codecov/codecov-action@v3 with: diff --git a/.github/workflows/main-cicd.yml b/.github/workflows/main-cicd.yml index 2153d770..e7629eeb 100644 --- a/.github/workflows/main-cicd.yml +++ b/.github/workflows/main-cicd.yml @@ -54,7 +54,7 @@ jobs: pip install -e . pip install tox just pre-commit - name: Run Tests with tox - run: tox -- --cov datafog --cov-report xml --cov-report term --codeblocks + run: tox -- --cov datafog --cov-report xml --cov-report term -v -s --cov-report=term-missing - name: Submit to Codecov uses: codecov/codecov-action@v3 with: diff --git a/README.md b/README.md index db6d3b66..7a457552 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ For local development: ``` 5. Install the package in editable mode: ``` - pip install -e . + pip install -r requirements-dev.txt ``` 6. Set up the project: ``` diff --git a/datafog/processing/image_processing/donut_processor.py b/datafog/processing/image_processing/donut_processor.py index 6e13987c..19282d17 100644 --- a/datafog/processing/image_processing/donut_processor.py +++ b/datafog/processing/image_processing/donut_processor.py @@ -5,6 +5,7 @@ import sys from io import BytesIO +import numpy as np import requests from PIL import Image @@ -13,7 +14,6 @@ class DonutProcessor: def __init__(self, model_path="naver-clova-ix/donut-base-finetuned-cord-v2"): - self.ensure_installed("torch") self.ensure_installed("transformers") @@ -36,13 +36,31 @@ def ensure_installed(self, package_name): [sys.executable, "-m", "pip", "install", package_name] ) - async def parse_image(self, image: Image) -> str: + def preprocess_image(self, image: Image.Image) -> np.ndarray: + # Convert to RGB if the image is not already in RGB mode + if image.mode != "RGB": + image = image.convert("RGB") + + # Convert to numpy array + image_np = np.array(image) + + # Ensure the image is 3D (height, width, channels) + if image_np.ndim == 2: + image_np = np.expand_dims(image_np, axis=-1) + image_np = np.repeat(image_np, 3, axis=-1) + + return image_np + + async def parse_image(self, image: Image.Image) -> str: """Process w/ DonutProcessor and VisionEncoderDecoderModel""" + # Preprocess the image + image_np = self.preprocess_image(image) + task_prompt = "" decoder_input_ids = self.processor.tokenizer( task_prompt, add_special_tokens=False, return_tensors="pt" ).input_ids - pixel_values = self.processor(image, return_tensors="pt").pixel_values + pixel_values = self.processor(images=image_np, return_tensors="pt").pixel_values outputs = self.model.generate( pixel_values.to(self.device), @@ -71,7 +89,7 @@ def process_url(self, url: str) -> str: image = self.downloader.download_image(url) return self.parse_image(image) - def download_image(self, url: str) -> Image: + def download_image(self, url: str) -> Image.Image: """Download an image from URL.""" response = requests.get(url) image = Image.open(BytesIO(response.content)) diff --git a/datafog/processing/spark_processing/pyspark_udfs.py b/datafog/processing/spark_processing/pyspark_udfs.py index c0ab26f5..a51f119a 100644 --- a/datafog/processing/spark_processing/pyspark_udfs.py +++ b/datafog/processing/spark_processing/pyspark_udfs.py @@ -7,7 +7,7 @@ def pii_annotator(text: str, broadcasted_nlp) -> list[list[str]]: - """Extract features using en_spacy_pii_fast model. + """Extract features using en_core_web_lg model. Returns: list[list[str]]: Values as arrays in order defined in the PII_ANNOTATION_LABELS. @@ -40,7 +40,7 @@ def pii_annotator(text: str, broadcasted_nlp) -> list[list[str]]: def broadcast_pii_annotator_udf( - spark_session=None, spacy_model: str = "en_spacy_pii_fast" + spark_session=None, spacy_model: str = "en_core_web_lg" ): """Broadcast PII annotator across Spark cluster and create UDF""" ensure_installed("pyspark") diff --git a/datafog/processing/text_processing/spacy_pii_annotator.py b/datafog/processing/text_processing/spacy_pii_annotator.py index 375d1cd3..4c8f46c5 100644 --- a/datafog/processing/text_processing/spacy_pii_annotator.py +++ b/datafog/processing/text_processing/spacy_pii_annotator.py @@ -3,7 +3,26 @@ from pydantic import BaseModel -PII_ANNOTATION_LABELS = ["DATE_TIME", "LOC", "NRP", "ORG", "PER"] +PII_ANNOTATION_LABELS = [ + "CARDINAL", + "DATE", + "EVENT", + "FAC", + "GPE", + "LANGUAGE", + "LAW", + "LOC", + "MONEY", + "NORP", + "ORDINAL", + "ORG", + "PERCENT", + "PERSON", + "PRODUCT", + "QUANTITY", + "TIME", + "WORK_OF_ART", +] MAXIMAL_STRING_SIZE = 1000000 @@ -12,21 +31,29 @@ class SpacyPIIAnnotator(BaseModel): @classmethod def create(cls) -> "SpacyPIIAnnotator": - try: - # Try loading as a spaCy model first - import spacy + import spacy - nlp = spacy.load("en_spacy_pii_fast") + try: + nlp = spacy.load("en_core_web_lg") except OSError: - # If that fails, try importing as a module - try: - import en_spacy_pii_fast - - nlp = en_spacy_pii_fast.load() - except ImportError: - raise ImportError( - "Failed to load en_spacy_pii_fast. Make sure it's installed correctly." - ) + import subprocess + import sys + + interpreter_location = sys.executable + subprocess.run( + [ + interpreter_location, + "-m", + "pip", + "install", + "--no-deps", + "--no-cache-dir", + "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl", + ], + check=True, + ) + nlp = spacy.load("en_core_web_lg") + return cls(nlp=nlp) def annotate(self, text: str) -> Dict[str, List[str]]: diff --git a/datafog/services/image_service.py b/datafog/services/image_service.py index 7a0c3f93..7666129a 100644 --- a/datafog/services/image_service.py +++ b/datafog/services/image_service.py @@ -1,15 +1,34 @@ import asyncio +import io +import ssl from typing import List +import aiohttp +import certifi from PIL import Image from datafog.processing.image_processing.donut_processor import DonutProcessor -from datafog.processing.image_processing.image_downloader import ImageDownloader from datafog.processing.image_processing.pytesseract_processor import ( PytesseractProcessor, ) +class ImageDownloader: + async def download_image(self, url: str) -> Image.Image: + ssl_context = ssl.create_default_context(cafile=certifi.where()) + async with aiohttp.ClientSession( + connector=aiohttp.TCPConnector(ssl=ssl_context) + ) as session: + async with session.get(url) as response: + if response.status == 200: + image_data = await response.read() + return Image.open(io.BytesIO(image_data)) + else: + raise Exception( + f"Failed to download image. Status code: {response.status}" + ) + + class ImageService: def __init__(self, use_donut: bool = False, use_tesseract: bool = True): self.downloader = ImageDownloader() @@ -21,7 +40,11 @@ def __init__(self, use_donut: bool = False, use_tesseract: bool = True): ) async def download_images(self, urls: List[str]) -> List[Image.Image]: - return await self.downloader.download_images(urls) + async def download_image(url: str) -> Image.Image: + return await self.downloader.download_image(url) + + tasks = [asyncio.create_task(download_image(url)) for url in urls] + return await asyncio.gather(*tasks, return_exceptions=True) async def ocr_extract( self, diff --git a/requirements-dev.txt b/requirements-dev.txt index cea9df8b..30052f91 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,10 +6,12 @@ just isort black blacken-docs +certifi flake8 prettier tox -pytest +pytest==7.4.0 +pytest-asyncio==0.21.0 pytest-cov mypy autoflake diff --git a/requirements.txt b/requirements.txt index 4dbd0eec..806079bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,181 +1,14 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements.txt setup.py -# -aiohttp==3.9.5 - # via datafog (setup.py) -aiosignal==1.3.1 - # via aiohttp -anyio==4.4.0 - # via starlette -async-timeout==4.0.3 - # via aiohttp -asyncio==3.4.3 - # via datafog (setup.py) -attrs==23.2.0 - # via aiohttp -blis==0.7.11 - # via thinc -catalogue==2.0.10 - # via - # spacy - # srsly - # thinc -certifi==2024.7.4 - # via requests -charset-normalizer==3.3.2 - # via requests -click==8.1.7 - # via typer -confection==0.1.5 - # via thinc -cymem==2.0.8 - # via - # preshed - # spacy - # thinc -en-spacy-pii-fast - # via datafog (setup.py) -exceptiongroup==1.2.2 - # via - # anyio - # pytest -fastapi==0.110.3 - # via datafog (setup.py) -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -idna==3.7 - # via - # anyio - # requests - # yarl -iniconfig==2.0.0 - # via pytest -jinja2==3.1.4 - # via spacy -langcodes==3.4.0 - # via spacy -language-data==1.2.0 - # via langcodes -marisa-trie==1.2.0 - # via language-data -markupsafe==2.1.5 - # via jinja2 -multidict==6.0.5 - # via - # aiohttp - # yarl -murmurhash==1.0.10 - # via - # preshed - # spacy - # thinc -numpy==1.24.1 - # via - # blis - # datafog (setup.py) - # pandas - # spacy - # thinc -packaging==24.1 - # via - # pytesseract - # pytest - # spacy - # thinc -pandas==2.2.2 - # via datafog (setup.py) -pathlib-abc==0.1.1 - # via pathy -pathy==0.11.0 - # via spacy -pillow==10.4.0 - # via - # datafog (setup.py) - # pytesseract -pluggy==1.5.0 - # via pytest -preshed==3.0.9 - # via - # spacy - # thinc -protobuf==5.27.2 - # via datafog (setup.py) -pydantic==1.10.15 - # via - # confection - # datafog (setup.py) - # fastapi - # spacy - # thinc -pytesseract==0.3.10 - # via datafog (setup.py) -pytest==8.2.2 - # via pytest-asyncio -pytest-asyncio==0.23.7 - # via datafog (setup.py) -python-dateutil==2.9.0.post0 - # via pandas -pytz==2024.1 - # via pandas -requests==2.31.0 - # via - # datafog (setup.py) - # spacy -sentencepiece==0.2.0 - # via datafog (setup.py) -six==1.16.0 - # via python-dateutil -smart-open==6.4.0 - # via - # pathy - # spacy -sniffio==1.3.1 - # via anyio -spacy==3.4.4 - # via - # datafog (setup.py) - # en-spacy-pii-fast -spacy-legacy==3.0.12 - # via spacy -spacy-loggers==1.0.5 - # via spacy -srsly==2.4.8 - # via - # confection - # spacy - # thinc -starlette==0.37.2 - # via fastapi -thinc==8.1.12 - # via spacy -tomli==2.0.1 - # via pytest -tqdm==4.66.4 - # via spacy -typer==0.7.0 - # via - # pathy - # spacy -typing-extensions==4.12.2 - # via - # anyio - # fastapi - # pydantic -tzdata==2024.1 - # via pandas -urllib3==2.2.2 - # via requests -wasabi==0.10.1 - # via - # spacy - # thinc -yarl==1.9.4 - # via aiohttp - -# The following packages are considered to be unsafe in a requirements file: -# setuptools +pandas +requests==2.32.3 +spacy==3.7.5 +pydantic>=2.8.2,<3.0.0 +Pillow +sentencepiece +protobuf +pytesseract +aiohttp +pytest-asyncio +numpy +fastapi +asyncio +setuptools diff --git a/setup.py b/setup.py index 5d713ab1..19c9379c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = f.read() # Use a single source of truth for the version -__version__ = "3.3.0" +__version__ = "3.4.0" project_urls = { "Homepage": "https://datafog.ai", @@ -26,25 +26,25 @@ packages=find_packages(), install_requires=[ "pandas", - "Requests==2.31.0", - "spacy==3.4.4", - "en_spacy_pii_fast", - "pydantic==1.10.15", + "requests==2.32.3", + "spacy==3.7.5", + "pydantic", "Pillow", "sentencepiece", "protobuf", "pytesseract", "aiohttp", "pytest-asyncio", - "numpy==1.24.1", + "numpy", "fastapi", "asyncio", - "setuptools==70.0.0", + "setuptools", ], - python_requires=">=3.10", + python_requires=">=3.10,<3.13", classifiers=[ - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.10", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Framework :: tox", diff --git a/tests/test_image_service.py b/tests/test_image_service.py index b43b25e6..69f52cf7 100644 --- a/tests/test_image_service.py +++ b/tests/test_image_service.py @@ -10,6 +10,8 @@ # use_tesseract selects pytesseract processor for OCR +import asyncio + import pytest from PIL import Image @@ -23,10 +25,13 @@ @pytest.mark.asyncio async def test_download_images(): - image_service1 = ImageService() - images = await image_service1.download_images(urls) - assert len(images) == 2 - assert all(isinstance(image, Image.Image) for image in images) + image_service = ImageService() + try: + images = await image_service.download_images(urls) + assert len(images) == 2 + assert all(isinstance(image, Image.Image) for image in images) + finally: + await asyncio.sleep(0) # Allow pending callbacks to run @pytest.mark.asyncio diff --git a/tests/test_main.py b/tests/test_main.py index aeb16d3f..8a749a44 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,94 +1,166 @@ +import asyncio import json +from unittest.mock import AsyncMock, patch import pytest -from datafog import DataFog, TextPIIAnnotator +from datafog.config import OperationType +from datafog.main import DataFog +from datafog.processing.text_processing.spacy_pii_annotator import ( + SpacyPIIAnnotator as TextPIIAnnotator, +) +from datafog.services.image_service import ImageService +from datafog.services.text_service import TextService -def search_nested_dict(d, target): - """Recursively search for a target value in nested dictionaries.""" - if isinstance(d, dict): - for key, value in d.items(): - if target in value: - return True - elif isinstance(value, dict): - if search_nested_dict(value, target): - return True - return False +@pytest.fixture +def mock_image_service(): + with patch("datafog.main.ImageService") as mock: + mock.return_value.ocr_extract = AsyncMock() + yield mock.return_value -def test_textpii_annotator(): - """Test the PII annotation functionality.""" - text = "John Doe lives at 1234 Elm St, Springfield." - text_annotator = TextPIIAnnotator() - annotated_text = text_annotator.run(text) - assert "Springfield" in annotated_text["LOC"], "PII not annotated correctly." +@pytest.fixture +def mock_text_service(): + with patch("datafog.main.TextService") as mock: + mock.return_value.batch_annotate_text_async = AsyncMock() + yield mock.return_value -# @pytest.mark.asyncio -# async def test_donut_processor(): -# """Test the PII annotation functionality for the donutprocessor.""" -# with open("tests/image_set.json", "r") as f: -# image_set = json.load(f) -# image_url = image_set["executive_email"] -# ocr_annotator = OCRPIIAnnotator() -# annotated_text = await ocr_annotator.run([image_url]) -# assert "Satya Nadella" in annotated_text[0].get("PER", []), "PII not annotated correctly." +@pytest.fixture +def text_annotator(): + return TextPIIAnnotator.create() -def test_datafog_text_annotation_sync(): - """Test DataFog class for synchronous text annotation.""" - text = ["Joe Biden is the President of the United States."] +@pytest.fixture(scope="module") +def datafog(): + return DataFog() + + +@pytest.fixture(scope="module") +def image_url(): + with open("tests/image_set.json", "r") as f: + return json.load(f)["executive_email"] + + +def test_text_pii_annotator(text_annotator): + text = "Travis Kalanick lives at 1234 Elm St, Springfield." + annotated_text = text_annotator.annotate(text) + + assert_annotation_results(annotated_text) + assert_file_output(annotated_text) + + +def assert_annotation_results(annotated_text): + assert annotated_text, "No results returned from annotation" + assert "PERSON" in annotated_text, "No person detected" + assert "LOC" in annotated_text, "No location detected" + assert ( + "Travis Kalanick" in annotated_text["PERSON"] + ), "Person not correctly identified" + assert "1234 Elm St" in annotated_text["FAC"], "Facility not correctly identified" + assert ( + "Springfield" in annotated_text["GPE"] + ), "Geopolitical entity not correctly identified" + + +def assert_file_output(annotated_text): + import os + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp: + json.dump(annotated_text, temp) + temp.flush() + assert os.path.exists(temp.name), "Output file not created" + with open(temp.name, "r") as f: + file_content = json.load(f) + assert ( + file_content == annotated_text + ), "File output doesn't match returned annotation" + os.unlink(temp.name) + + +def test_datafog_init(): datafog = DataFog() - annotated_text = datafog.run_text_pipeline_sync(text) + assert isinstance(datafog.image_service, ImageService) + assert isinstance(datafog.text_service, TextService) + assert datafog.spark_service is None + assert datafog.operations == [OperationType.ANNOTATE_PII] + + custom_image_service = ImageService() + custom_text_service = TextService() + custom_operations = [OperationType.ANNOTATE_PII, OperationType.REDACT_PII] - assert annotated_text # Ensure that some results are returned. - assert search_nested_dict( - annotated_text, "Joe Biden" - ), "Joe Biden not found in annotated results." - assert search_nested_dict( - annotated_text, "the United States" - ), "United States not found in annotated results." + datafog_custom = DataFog( + image_service=custom_image_service, + text_service=custom_text_service, + operations=custom_operations, + ) + + assert datafog_custom.image_service == custom_image_service + assert datafog_custom.text_service == custom_text_service + assert datafog_custom.operations == custom_operations @pytest.mark.asyncio -async def test_datafog_text_annotation(): - """Test DataFog class for text annotation.""" - text = ["Joe Biden is the President of the United States."] - datafog = DataFog() - annotated_text = await datafog.run_text_pipeline(text) +async def test_run_ocr_pipeline(mock_image_service, mock_text_service): + datafog = DataFog(image_service=mock_image_service, text_service=mock_text_service) + + mock_image_service.ocr_extract.return_value = ["Extracted text"] + mock_text_service.batch_annotate_text_async.return_value = { + "PERSON": ["Satya Nadella"] + } + + result = await datafog.run_ocr_pipeline(["image_url"]) - assert annotated_text # Ensure that some results are returned. - assert search_nested_dict( - annotated_text, "Joe Biden" - ), "Joe Biden not found in annotated results." - assert search_nested_dict( - annotated_text, "the United States" - ), "United States not found in annotated results." + mock_image_service.ocr_extract.assert_called_once_with(["image_url"]) + mock_text_service.batch_annotate_text_async.assert_called_once_with( + ["Extracted text"] + ) + assert result == {"PERSON": ["Satya Nadella"]} @pytest.mark.asyncio -async def test_datafog_image_extraction(): - """Test DataFog class for image text extraction.""" - with open("tests/image_set.json", "r") as f: - image_set = json.load(f) - image_url = image_set["executive_email"] - datafog = DataFog() - extracted_text = await datafog.run_ocr_pipeline([image_url]) - assert extracted_text, "No text extracted." - assert search_nested_dict( - extracted_text, "Satya Nadella" - ), "Satya Nadella not found in extracted text." +async def test_run_text_pipeline(mock_text_service): + datafog = DataFog(text_service=mock_text_service) + + mock_text_service.batch_annotate_text_async.return_value = {"PERSON": ["Elon Musk"]} + + result = await datafog.run_text_pipeline( + ["Elon Musk tries one more time to save his $56 billion pay package"] + ) + + mock_text_service.batch_annotate_text_async.assert_called_once_with( + ["Elon Musk tries one more time to save his $56 billion pay package"] + ) + assert result == {"PERSON": ["Elon Musk"]} @pytest.mark.asyncio -async def test_datafog_image_annotation(): - """Test DataFog class for image text annotation.""" - with open("tests/image_set.json", "r") as f: - image_set = json.load(f) - image_url = image_set["executive_email"] - datafog = DataFog() - annotated_text = await datafog.run_ocr_pipeline([image_url]) - assert search_nested_dict( - annotated_text, "Satya Nadella" - ), "Satya Nadella not found in annotated text." +async def test_run_text_pipeline_no_annotation(): + datafog = DataFog(operations=[]) + + result = await datafog.run_text_pipeline(["Sample text"]) + + assert result == ["Sample text"] + + +def test_run_text_pipeline_sync(mock_text_service): + datafog = DataFog(text_service=mock_text_service) + + mock_text_service.batch_annotate_text_sync.return_value = {"PERSON": ["Jeff Bezos"]} + + result = datafog.run_text_pipeline_sync(["Jeff Bezos steps down as Amazon CEO"]) + + mock_text_service.batch_annotate_text_sync.assert_called_once_with( + ["Jeff Bezos steps down as Amazon CEO"] + ) + assert result == {"PERSON": ["Jeff Bezos"]} + + +def test_run_text_pipeline_sync_no_annotation(): + datafog = DataFog(operations=[]) + + result = datafog.run_text_pipeline_sync(["Sample text"]) + + assert result == ["Sample text"] diff --git a/tox.ini b/tox.ini index f428ab4b..4846df23 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] -envlist = py310 +envlist = py310,py311,py312 isolated_build = True [testenv] deps = - pytest - pytest-codeblocks + pytest==7.4.0 + pytest-asyncio==0.21.0 pytest-cov -r requirements-dev.txt extras = all @@ -15,4 +15,24 @@ allowlist_externals = commands = pip install --no-cache-dir -r requirements-dev.txt tesseract --version - pytest {posargs} --codeblocks \ No newline at end of file + pytest {posargs} -v -s --cov=datafog --cov-report=term-missing + +[testenv:lint] +skip_install = true +deps = + black + isort + flake8 +commands = + black --check . + isort --check-only . + flake8 . + +[testenv:typecheck] +deps = + mypy +commands = + mypy datafog tests + +[pytest] +asyncio_mode = auto \ No newline at end of file