Skip to content

Commit

Permalink
precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
Sid Mohan authored and Sid Mohan committed Aug 18, 2024
1 parent 9c9d908 commit 0691864
Show file tree
Hide file tree
Showing 24 changed files with 727 additions and 24 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,7 @@ node_modules/
.DS_Store
.venv
examples/venv/
error_log.txt
error_log.txt
docs/*
!docs/*.rst
!docs/conf.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
</p>

<p align="center">
<b>Open-source DevSecOps for Generative AI Systems</b>. <br />
<b>Open-source PII Detection & Anonymization</b>. <br />
</p>

<p align="center">
Expand Down
73 changes: 64 additions & 9 deletions datafog/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# client.py
"""
Client module for DataFog.
Provides CLI commands for scanning images and text using DataFog's OCR and PII detection capabilities.
"""

import asyncio
import logging
Expand All @@ -25,7 +29,18 @@ def scan_image(
),
operations: str = typer.Option("annotate_pii", help="Operation to perform"),
):
"""Extract text from images."""
"""
Scan images for text and PII.
Extracts text from images using OCR, then detects PII entities.
Handles both remote URLs and local file paths.
Args:
image_urls: List of image URLs or file paths
operations: Pipeline operations to run (default: annotate_pii)
Prints results or exits with error on failure.
"""
if not image_urls:
typer.echo("No image URLs or file paths provided. Please provide at least one.")
raise typer.Exit(code=1)
Expand All @@ -48,7 +63,17 @@ def scan_text(
),
operations: str = typer.Option("annotate_pii", help="Operation to perform"),
):
"""Annotate texts to detect PII entities."""
"""
Scan texts for PII.
Detects PII entities in a list of input texts.
Args:
str_list: List of texts to analyze
operations: Pipeline operations to run (default: annotate_pii)
Prints results or exits with error on failure.
"""
if not str_list:
typer.echo("No texts provided.")
raise typer.Exit(code=1)
Expand All @@ -66,19 +91,34 @@ def scan_text(

@app.command()
def health():
"""Check DataFog service health."""
"""
Check DataFog service health.
Prints a message indicating that DataFog is running.
"""
typer.echo("DataFog is running.")


@app.command()
def show_config():
"""Show current configuration."""
"""
Show current configuration.
Prints the current DataFog configuration.
"""
typer.echo(get_config())


@app.command()
def download_model(model_name: str = typer.Argument(..., help="Model to download")):
"""Download a model."""
"""
Download a spaCy model.
Args:
model_name: Name of the model to download.
Prints a confirmation message after downloading.
"""
SpacyAnnotator.download_model(model_name)
typer.echo(f"Model {model_name} downloaded.")

Expand All @@ -87,21 +127,36 @@ def download_model(model_name: str = typer.Argument(..., help="Model to download
def show_spacy_model_directory(
model_name: str = typer.Argument(..., help="Model to check")
):
"""Show model path."""
"""
Show the directory path for a spaCy model.
Args:
model_name: Name of the model to check.
Prints the directory path of the specified model.
"""
annotator = SpacyAnnotator(model_name)
typer.echo(annotator.show_model_path())


@app.command()
def list_spacy_models():
"""List available models."""
"""
List available spaCy models.
Prints a list of all available spaCy models.
"""
annotator = SpacyAnnotator()
typer.echo(annotator.list_models())


@app.command()
def list_entities():
"""List available entities."""
"""
List available entities.
Prints a list of all available entities that can be recognized.
"""
annotator = SpacyAnnotator()
typer.echo(annotator.list_entities())

Expand Down
28 changes: 27 additions & 1 deletion datafog/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
"""
Configuration module for DataFog.
Defines settings and provides a global config instance.
Includes API keys, URLs, timeouts, and other options.
"""

import os
from enum import Enum
from typing import Optional
Expand All @@ -7,7 +14,17 @@


class DataFogConfig(BaseSettings):
"""Configuration settings for DataFog SDK"""
"""
Configuration settings for DataFog SDK.
This class defines all the configuration options used throughout the DataFog SDK.
It includes settings for API authentication, service URLs, timeouts, retries,
rate limiting, and logging. The configuration can be updated at runtime using
environment variables or programmatically via the update method.
All settings have default values that can be overridden as needed. The class
uses Pydantic for data validation and settings management.
"""

# API Keys and Authentication
api_key: str = os.environ.get("DATAFOG_API_KEY", "")
Expand Down Expand Up @@ -60,6 +77,15 @@ def configure(**kwargs):


class OperationType(str, Enum):
"""
Enum for supported DataFog operations.
ANNOTATE_PII: Detect and annotate PII in text
EXTRACT_TEXT: Extract text from images
REDACT_PII: Remove PII from text
ANONYMIZE_PII: Replace PII with fake data
"""

ANNOTATE_PII = "annotate_pii"
EXTRACT_TEXT = "extract_text"
REDACT_PII = "redact_pii"
Expand Down
57 changes: 52 additions & 5 deletions datafog/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,78 @@
# exceptions.py
"""
Exceptions module for DataFog SDK.
This module defines custom exceptions and utility functions for error handling in the DataFog SDK.
"""


class DataFogException(Exception):
"""Base exception for DataFog SDK"""
"""
Base exception for DataFog SDK.
Attributes:
message (str): The error message.
status_code (int, optional): The HTTP status code associated with the error.
"""

def __init__(self, message: str, status_code: int = None):
"""
Initialize a DataFogException.
Args:
message (str): The error message.
status_code (int, optional): The HTTP status code associated with the error.
"""
self.message = message
self.status_code = status_code
super().__init__(self.message)


class BadRequestError(DataFogException):
"""Exception raised for 400 Bad Request errors"""
"""
Exception raised for 400 Bad Request errors.
Inherits from DataFogException and sets the status code to 400.
"""

def __init__(self, message: str):
"""
Initialize a BadRequestError.
Args:
message (str): The error message.
"""
super().__init__(message, status_code=400)


class UnprocessableEntityError(DataFogException):
"""Exception raised for 422 Unprocessable Entity errors"""
"""
Exception raised for 422 Unprocessable Entity errors.
Inherits from DataFogException and sets the status code to 422.
"""

def __init__(self, message: str):
"""
Initialize an UnprocessableEntityError.
Args:
message (str): The error message.
"""
super().__init__(message, status_code=422)


def raise_for_status_code(status_code: int, error_message: str):
"""Raise the appropriate exception based on the status code"""
"""
Raise the appropriate exception based on the status code.
Args:
status_code (int): The HTTP status code.
error_message (str): The error message to include in the exception.
Raises:
BadRequestError: If the status code is 400.
UnprocessableEntityError: If the status code is 422.
"""
if status_code == 400:
raise BadRequestError(error_message)
elif status_code == 422:
Expand Down
Loading

0 comments on commit 0691864

Please sign in to comment.