diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..479fe07 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,32 @@ +name: Deploy docs + +on: + workflow_dispatch: + push: + branches: main + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'Pipfile' + +jobs: + build: + runs-on: [self-hosted, linux] + steps: + + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - run: python -m pip install build + + - name: Install poetry + run: make install-poetry + + - name: Install dependencies + run: make install-env + + - name: Build and Deploy docs + run: make deploy-docs \ No newline at end of file diff --git a/Makefile b/Makefile index 3c8b386..08738a1 100644 --- a/Makefile +++ b/Makefile @@ -12,4 +12,8 @@ linter:: poetry run pylint llm_wrapper --reports=no --output-format=colorized --fail-under=8.0 tests:: - poetry run python -m pytest -s --verbose \ No newline at end of file + poetry run python -m pytest -s --verbose + + +deploy-docs:: + poetry run mkdocs gh-deploy --force \ No newline at end of file diff --git a/docs/api/input_output_dataclasses.md b/docs/api/input_output_dataclasses.md new file mode 100644 index 0000000..81cb083 --- /dev/null +++ b/docs/api/input_output_dataclasses.md @@ -0,0 +1,44 @@ +--- +layout: default +title: Input/Output dataclasses +nav_order: 0 +parent: API +--- + + +## `class llm_wrapper.domain.input_data.InputData` dataclass +```python +@dataclass +class InputData: + input_mappings: Dict[str, str] + id: str +``` +#### Fields +- `input_mappings` (`Dict[str, str]`): Contains mapping from symbolic variables used in the prompt to the actual data + that will be injected in place of these variables. You have to provide a map for each of symbolic variable used + in the prompt. +- `id` (`str`): Unique identifier. Requests are done in an async mode, so the order of the responses is not the same + as the order of the input data, so this field can be used to identify them. + +## `class llm_wrapper.domain.response.ResponseData` dataclass +```python +@dataclass +class ResponseData: + response: Union[str, BaseModel] + input_data: Optional[InputData] = None + + number_of_prompt_tokens: Optional[int] = None + number_of_generated_tokens: Optional[int] = None + error: Optional[str] = None +``` +#### Fields +- `response` (`Union[str, BaseModel]`): Contains response of the model. If `output_data_model_class` param was provided + to the `generate()` method, it'll contain response parsed to the provided class. If `output_data_model_class` wasn't + provided, it'll contain raw string returned from the model. +- `input_data` (`Optional[InputData]`): If `input_data` was provided to the `generate()` method, it'll copy-paste that + data to this field. +- `number_of_prompt_tokens` (`int`): Number of tokens used in the prompt. +- `number_of_generated_tokens` (`str`): Number of tokens generated by the model. +- `error` (`str`): If any error that prevented from completing the generation pipeline fully occurred, it'll be listed + here. + diff --git a/docs/api/models/azure_llama2_model.md b/docs/api/models/azure_llama2_model.md new file mode 100644 index 0000000..40fdf3f --- /dev/null +++ b/docs/api/models/azure_llama2_model.md @@ -0,0 +1,82 @@ +--- +layout: default +title: AzureLlama2Model +parent: Models +grand_parent: API +nav_order: 3 +--- + +## `class llm_wrapper.models.AzureLlama2Model` API +### Methods +```python +__init__( + temperature: float = 0.0, + top_p: float = 1.0, + max_output_tokens: int = 512, + model_total_max_tokens: int = 4096, + max_concurrency: int = 1000, + max_retries: int = 8 +) +``` +#### Parameters +- `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. +- `top_p` (`float`): Default: `1.0`. +- `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens + and generated tokens is limited by the model's context length. Default: `512`. +- `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. + Default: `4096`. +- `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. +- `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. + +--- + +```python +generate( + prompt: str, + input_data: typing.Optional[typing.List[InputData]] = None, + output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None +) -> typing.List[ResponseData]: +``` +#### Parameters +- `prompt` (`str`): Prompt to use to query the model. +- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to + generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided + in the `input_mappings` of `InputData`. +- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the + format defined by the passed class. Generated response is automatically parsed to this class. + +#### Returns +`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` +is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. + +--- + +```python +AzureLlama2Model.setup_environment( + azure_api_key: str, + azure_endpoint_url: str, + azure_deployment_name: str +) +``` +#### Parameters +- `azure_api_key` (`str`): Authentication key for the endpoint. +- `azure_endpoint_url` (`str`): URL of pre-existing endpoint. +- `azure_deployment_name` (`str`): The name under which the model was deployed. + +--- + +### Example usage +```python +from llm_wrapper.models import AzureLlama2Model +from llm_wrapper.domain.configuration import AzureSelfDeployedConfiguration + +configuration = AzureSelfDeployedConfiguration( + api_key="", + endpoint_url="", + deployment="" +) + +llama_model = AzureLlama2Model(config=configuration) +llama_response = llama_model.generate("2+2 is?") +``` \ No newline at end of file diff --git a/docs/api/models/azure_mistral_model.md b/docs/api/models/azure_mistral_model.md new file mode 100644 index 0000000..6485c77 --- /dev/null +++ b/docs/api/models/azure_mistral_model.md @@ -0,0 +1,82 @@ +--- +layout: default +title: AzureMistralModel +parent: Models +grand_parent: API +nav_order: 5 +--- + +## `class llm_wrapper.models.AzureMistralModel` API +### Methods +```python +__init__( + temperature: float = 0.0, + top_p: float = 1.0, + max_output_tokens: int = 1024, + model_total_max_tokens: int = 8192, + max_concurrency: int = 1000, + max_retries: int = 8 +) +``` +#### Parameters +- `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. +- `top_p` (`float`): Default: `1.0`. +- `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens + and generated tokens is limited by the model's context length. Default: `1024`. +- `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. + Default: `8192`. +- `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. +- `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. + +--- + +```python +generate( + prompt: str, + input_data: typing.Optional[typing.List[InputData]] = None, + output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None +) -> typing.List[ResponseData]: +``` +#### Parameters +- `prompt` (`str`): Prompt to use to query the model. +- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to + generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided + in the `input_mappings` of `InputData`. +- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the + format defined by the passed class. Generated response is automatically parsed to this class. + +#### Returns +`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` +is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. + +--- + +```python +AzureMistralModel.setup_environment( + azure_api_key: str, + azure_endpoint_url: str, + azure_deployment_name: str +) +``` +#### Parameters +- `azure_api_key` (`str`): Authentication key for the endpoint. +- `azure_endpoint_url` (`str`): URL of pre-existing endpoint. +- `azure_deployment_name` (`str`): The name under which the model was deployed. + +--- + +### Example usage +```python +from llm_wrapper.models import AzureMistralModel +from llm_wrapper.domain.configuration import AzureSelfDeployedConfiguration + +configuration = AzureSelfDeployedConfiguration( + api_key="", + endpoint_url="", + deployment="" +) + +mistral_model = AzureMistralAIModel(config=configuration) +mistral_response = mistral_model.generate("2+2 is?") +``` \ No newline at end of file diff --git a/docs/api/models/azure_openai_model.md b/docs/api/models/azure_openai_model.md new file mode 100644 index 0000000..8b9238d --- /dev/null +++ b/docs/api/models/azure_openai_model.md @@ -0,0 +1,93 @@ +--- +layout: default +title: AzureOpenAIModel +parent: Models +grand_parent: API +nav_order: 1 +--- + +## `class llm_wrapper.models.AzureOpenAIModel` API +### Methods +```python +__init__( + temperature: float = 0.0, + max_output_tokens: int = 512, + request_timeout_s: int = 60, + model_total_max_tokens: int = 4096, + max_concurrency: int = 1000, + max_retries: int = 8 +) +``` +#### Parameters +- `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. +- `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens + and generated tokens is limited by the model's context length. Default: `512`. +- `request_timeout_s` (`int`): Timeout for requests to the model. Default: `60`. +- `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. + Default: `4096`. +- `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. +- `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. + +--- + +```python +generate( + prompt: str, + input_data: typing.Optional[typing.List[InputData]] = None, + output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None +) -> typing.List[ResponseData]: +``` +#### Parameters +- `prompt` (`str`): Prompt to use to query the model. +- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to + generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided + in the `input_mappings` of `InputData`. +- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the + format defined by the passed class. Generated response is automatically parsed to this class. + +#### Returns +`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` +is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. + +--- + +```python +AzureOpenAIModel.setup_environment( + openai_api_key: str, + openai_api_base: str, + openai_api_version: str, + openai_api_deployment_name: str, + openai_api_type: str = "azure", + model_name: str = "gpt-3.5-turbo", +) +``` +Sets up the environment for the `AzureOpenAIModel` model. +#### Parameters +- `openai_api_key` (`str`): The API key for your Azure OpenAI resource. You can find this in the Azure portal under + your Azure OpenAI resource. +- `openai_api_base` (`str`): The base URL for your Azure OpenAI resource. You can find this in the Azure portal under + your Azure OpenAI resource. +- `openai_api_version` (`str`): The API version. +- `openai_api_deployment_name` (`str`): The name under which the model was deployed. +- `openai_api_type` (`str`): Default: `"azure"`. +- `model_name` (`str`): Model name to use. Default: `"gpt-3.5-turbo"`. + +--- + +### Example usage +```python +from llm_wrapper.models import AzureOpenAIModel +from llm_wrapped.domain.configuration import AzureOpenAIConfiguration + +configuration = AzureOpenAIConfiguration( + api_key="", + base_url="", + api_version="", + deployment="", + model_name="" +) + +gpt_model = AzureOpenAIModel(config=configuration) +gpt_response = gpt_model.generate("2+2 is?") +``` diff --git a/docs/api/models/vertexai_gemini_model.md b/docs/api/models/vertexai_gemini_model.md new file mode 100644 index 0000000..03476e7 --- /dev/null +++ b/docs/api/models/vertexai_gemini_model.md @@ -0,0 +1,86 @@ +--- +layout: default +title: VertexAIGeminiModel +parent: Models +grand_parent: API +nav_order: 4 +--- + +## `class llm_wrapper.models.VertexAIGeminiModel` API +### Methods +```python +__init__( + temperature: float = 0.0, + top_k: int = 40, + top_p: float = 0.95, + max_output_tokens: int = 1024, + model_total_max_tokens: int = 8192, + max_concurrency: int = 1000, + max_retries: int = 8, + verbose: bool = True +) +``` +#### Parameters +- `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. +- `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected + from among the 3 most probable tokens. Default: `40`. +- `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to + least until the sum of their probabilities equals the top_p value. Default: `0.95`. +- `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens + and generated tokens is limited by the model's context length. Default: `1024`. +- `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`. +- `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. +- `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. +- `verbose` (`bool`): Default: `True`. + +--- + +```python +generate( + prompt: str, + input_data: typing.Optional[typing.List[InputData]] = None, + output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None +) -> typing.List[ResponseData]: +``` +#### Parameters +- `prompt` (`str`): Prompt to use to query the model. +- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to + generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided + in the `input_mappings` of `InputData`. +- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the + format defined by the passed class. Generated response is automatically parsed to this class. + +#### Returns +`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` +is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. + +--- + +```python +VertexAIGeminiModel.setup_environment( + gcp_project_id: str, + gcp_llm_region: str, + gcp_model_name: str = "gemini-pro" +) +``` +#### Parameters +- `gcp_project_id` (`str`): The GCP project to use when making Vertex API calls. +- `gcp_llm_region` (`str`): The region to use when making API calls. +- `gcp_model_name` (`str`): Default: `"gemini-pro"`. + +--- + +### Example usage +```python +from llm_wrapper.models import VertexAIGeminiModel +from llm_wrapper.domain.configuration import VertexAIConfiguration + +configuration = VertexAIConfiguration( + cloud_project="", + cloud_location="" +) + +vertex_model = VertexAIGeminiModel(config=configuration) +vertex_response = vertex_model.generate("2+2 is?") +``` \ No newline at end of file diff --git a/docs/api/models/vertexai_palm_model.md b/docs/api/models/vertexai_palm_model.md new file mode 100644 index 0000000..ed83283 --- /dev/null +++ b/docs/api/models/vertexai_palm_model.md @@ -0,0 +1,86 @@ +--- +layout: default +title: VertexAIPalmModel +parent: Models +grand_parent: API +nav_order: 2 +--- + +## `class llm_wrapper.models.VertexAIPalmModel` API +### Methods +```python +__init__( + temperature: float = 0.0, + top_k: int = 40, + top_p: float = 0.95, + max_output_tokens: int = 1024, + model_total_max_tokens: int = 8192, + max_concurrency: int = 1000, + max_retries: int = 8, + verbose: bool = True +) +``` +#### Parameters +- `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. +- `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected + from among the 3 most probable tokens. Default: `40`. +- `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to + least until the sum of their probabilities equals the top_p value. Default: `0.95`. +- `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens + and generated tokens is limited by the model's context length. Default: `1024`. +- `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`. +- `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. +- `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. +- `verbose` (`bool`): Default: `True`. + +--- + +```python +generate( + prompt: str, + input_data: typing.Optional[typing.List[InputData]] = None, + output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None +) -> typing.List[ResponseData]: +``` +#### Parameters +- `prompt` (`str`): Prompt to use to query the model. +- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to + generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided + in the `input_mappings` of `InputData`. +- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the + format defined by the passed class. Generated response is automatically parsed to this class. + +#### Returns +`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` +is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. + +--- + +```python +VertexAIPalmModel.setup_environment( + gcp_project_id: str, + gcp_llm_region: str, + gcp_model_name: str = "text-bison@001" +) +``` +#### Parameters +- `gcp_project_id` (`str`): The GCP project to use when making Vertex API calls. +- `gcp_llm_region` (`str`): The region to use when making API calls. +- `gcp_model_name` (`str`): Default: `"text-bison@001"`. + +--- + +### Example usage +```python +from llm_wrapper.models import VertexAIPalmModel +from llm_wrapper.domain.configuration import VertexAIConfiguration + +configuration = VertexAIConfiguration( + cloud_project="", + cloud_location="" +) + +vertex_model = VertexAIPalmModel(config=configuration) +vertex_response = vertex_model.generate("2+2 is?") +``` \ No newline at end of file diff --git a/docs/assets/images/logo.png b/docs/assets/images/logo.png new file mode 100644 index 0000000..4b199a8 Binary files /dev/null and b/docs/assets/images/logo.png differ diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 0000000..a3e4551 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,27 @@ +--- +layout: default +title: FAQ +nav_order: 2 +--- + + +### 1. How to use the llm-wrapper in a python notebook? +When using the `llm-wrapper` library, which utilizes asynchronous programming under the hood, you must install the `nest-asyncio` library to use it in a Jupyter notebook environment. + +To ensure proper functionality, execute the following code at the beginning of your notebook: +```jupyterpython +!pip install nest-asyncio +import nest_asyncio +nest_asyncio.apply() +``` + + + +### 2. How can I estimate the cost of my queries? + +The model provides information for each record about the count of tokens in the prompt and the count of generated tokens. +In most cases, pricing for Language Models (LLMs) is determined based on the total number of tokens processed, which encompasses both prompt tokens and generated tokens. It is essential to familiarize yourself with the pricing details offered by your service provider to understand the associated costs. An example pricing for AzureOpenAI can be found [here](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/). + + + + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..46512c8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,34 @@ +--- +layout: default +title: Introduction +nav_order: 0 +--- + +

+LLM-Wrapper Logo +

+ +# Introduction + +`llm-wrapper` is a versatile and powerful library designed to streamline the process of querying large language models, offering a user-friendly experience. The `llm-wrapper` module is designed to simplify interactions with the underlying model by providing the following features: + +- **Simple and User-Friendly Interface**: The module offers an intuitive and easy-to-use interface, making it straightforward to work with the model. + +- **Asynchronous Querying (Default)**: Requests to the model are processed asynchronously by default, ensuring efficient and non-blocking interactions. + +- **Automatic Retrying Mechanism**: The module includes an automatic retrying mechanism, which helps handle transient errors and ensures that queries to the model are robust. + +- **Error Handling and Management**: Errors that may occur during interactions with the model are handled and managed gracefully, providing informative error messages and potential recovery options. + +- **Simple Output Parsing**: The module simplifies the process of parsing and working with the model's output, allowing you to easily extract the information you need. + + + +### Supported Models + +Currently, the library supports: + +* OpenAI models hosted on Microsoft Azure (`gpt-3.5-turbo`, `gpt4`, `gpt4-turbo`); +* Google Cloud Platform VertexAI models (`PaLM2`, `Gemini`); +* Open-source models `Llama2` and `Mistral` self-deployed on Azure. + diff --git a/docs/installation_and_quick_start.md b/docs/installation_and_quick_start.md new file mode 100644 index 0000000..8b1b39c --- /dev/null +++ b/docs/installation_and_quick_start.md @@ -0,0 +1,122 @@ +--- +layout: default +title: Installation & Quick start +nav_order: 1 +--- + +# Installation +Install the package via pip: + +```bash +pip install llm-wrapper +``` + +# Quick Start + +To use our package, you must have access to the credentials of the endpoint with the deployed model. +Each of the supported models have a different set of credentials +that must be passed in the corresponding configuration object. Below is a brief overview of how to use each of these models. + +## Simple usage + +### Azure GPT + +```python +from llm_wrapper.models import AzureOpenAIModel +from llm_wrapper.domain.configuration import AzureOpenAIConfiguration + +configuration = AzureOpenAIConfiguration( + api_key="", + base_url="", + api_version="", + deployment="", + model_name="" +) + +gpt_model = AzureOpenAIModel(config=configuration) +gpt_response = gpt_model.generate("2+2 is?") +``` + +* ``: The API key for your Azure OpenAI resource. You can find this in the Azure portal under your + Azure OpenAI resource. +* ``: The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your + Azure OpenAI resource. +* ``: The API version. +* ``: The name under which the model was deployed. +* ``: The underlying model's name. + +### Google PaLM + +```python +from llm_wrapper.models import VertexAIPalmModel +from llm_wrapper.domain.configuration import VertexAIConfiguration + +configuration = VertexAIConfiguration( + cloud_project="", + cloud_location="" +) + +palm_model = VertexAIPalmModel(config=configuration) +palm_response = palm_model.generate("2+2 is?") +``` + +* ``: The GCP project in which you have access to the PALM model. +* ``: The region where the model is deployed. + +### Google Gemini + +```python +from llm_wrapper.models import VertexAIGeminiModel +from llm_wrapper.domain.configuration import VertexAIConfiguration + +configuration = VertexAIConfiguration( + cloud_project="", + cloud_location="" +) + +gemini_model = VertexAIGeminiModel(config=configuration) +gemini_response = gemini_model.generate("2+2 is?") +``` + +* ``: The GCP project in which you have access to the PALM model. +* ``: The region where the model is deployed. + +### Azure LLaMA 2 + +```python +from llm_wrapper.models import AzureLlama2Model +from llm_wrapper.domain.configuration import AzureSelfDeployedConfiguration + +configuration = AzureSelfDeployedConfiguration( + api_key="", + endpoint_url="", + deployment="" +) + +llama_model = AzureLlama2Model(config=configuration) +llama_response = llama_model.generate("2+2 is?") +``` + +* ``: Authentication key for the endpoint. +* ``: URL of pre-existing endpoint. +* ``: The name under which the model was deployed. + +### Azure Mistral + +```python +from llm_wrapper.models.azure_mistral import AzureMistralModel +from llm_wrapper.domain.configuration import AzureSelfDeployedConfiguration + +configuration = AzureSelfDeployedConfiguration( + api_key="", + endpoint_url="", + deployment="" +) + +mistral_model = AzureMistralModel(config=configuration) +mistral_response = mistral_model.generate("2+2 is?") +``` + +* ``: Authentication key for the endpoint. +* ``: URL of pre-existing endpoint. +* ``: The name under which the model was deployed. diff --git a/docs/tutorial/batch_query.md b/docs/tutorial/batch_query.md new file mode 100644 index 0000000..01346d6 --- /dev/null +++ b/docs/tutorial/batch_query.md @@ -0,0 +1,101 @@ +--- +layout: default +title: Batch query +nav_order: 1 +parent: Tutorial +--- + +If you want to generate responses for a batch of examples, you can achieve this by preparing a prompt with symbolic +variables and providing input data that will be injected into this prompt. `llm-wrapper` will automatically make these +requests in an async mode and retry them in case of any API error. + +Let's say we want to classify reviews of coffee as positive or negative. Here's how to do it: +```python +positive_review_0 = "Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness." +positive_review_1 = "Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best." +negative_review = "Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." + +prompt = "You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}" +input_data = [ + InputData(input_mappings={"review": positive_review_0}, id="0"), + InputData(input_mappings={"review": positive_review_1}, id="1"), + InputData(input_mappings={"review": negative_review}, id="2") +] + +responses = model.generate(prompt=prompt, input_data=input_data) +``` + +As an output we'll get `List[ResponseData]` where each `ResponseData` will contain response for a single example from +`input_data`. The requests are performed in an async mode, so remember that the order of the `responses` is not the same +as the order of the `input_data`. That's why together with the response, we pass also the `ResponseData.input_data` to +the output. + +So let's see the responses: +```python +>>> {f"review_id={response.input_data.id}": response.response for response in responses} +{ + 'review_id=0': 'The review is positive.', + 'review_id=1': 'The review is positive.', + 'review_id=2': 'The review is negative.' +} + +``` + +## Multiple symbolic variables +You can also define prompt with multiple symbolic variables. The rule is that each symbolic variable from the prompt +should have mapping provided in the `input_mappings` of `InputData`. Let's say we want to provide two reviews in one +prompt and let the model decide which one of them is positive. Here's how to do it: + +```python +prompt = """You'll be provided with two reviews of a coffee. Decide which one is positive. + +First review: {first_review} +Second review: {second_review}""" +input_data = [ + InputData(input_mappings={"first_review": positive_review_0, "second_review": negative_review}, id="0"), + InputData(input_mappings={"first_review": negative_review, "second_review": positive_review_1}, id="1"), +] + +responses = model.generate(prompt=prompt, input_data=input_data) +``` + +And the results: +```python +>>> {f"example_id={response.input_data.id}": response.response for response in responses} +{ + 'example_id=0': 'The first review is positive.', + 'example_id=1': 'The second review is positive.' +} +``` + +## Control the number of concurrent requests +As it's written above, `llm-wrapper` automatically makes requests in an async mode. By default, the maximum number of +concurrent requests is set to 1000. You can control this value by setting the `max_concurrency` parameter when +initializing the model. Set it to a value that is appropriate for your model endpoint. + +## Use a common asyncio event loop +By default, each model instance has its own event loop for handling the execution of async tasks. If you want to use +a common loop for multiple models or to have a custom loop, it's possible to specify it in the model constructor: + +```python +import asyncio + +from llm_wrapper.models import AzureOpenAIModel +from llm_wrapper.domain.configuration import AzureOpenAIConfiguration + + +custom_event_loop = asyncio.new_event_loop() + +configuration = AzureOpenAIConfiguration( + api_key="", + base_url="", + api_version="", + deployment="", + model_name="" +) + +model = AzureOpenAIModel( + config=configuration, + event_loop=custom_event_loop +) +``` \ No newline at end of file diff --git a/docs/tutorial/deploy_llama2_on_azure.md b/docs/tutorial/deploy_llama2_on_azure.md new file mode 100644 index 0000000..ee12cb8 --- /dev/null +++ b/docs/tutorial/deploy_llama2_on_azure.md @@ -0,0 +1,26 @@ +--- +layout: default +title: Azure Open-source Models Deployment +nav_order: 4 +parent: Tutorial +--- + + +To use Open-source models like Llama or Mistral with llm-wrapper, first you have to deploy it on your own on Azure as a ML Online Endpoint. +Here's how to do it: +1. Go to [ml.azure.com](https://ml.azure.com/) and use a subscription with a workspace that has access to the + `Model catalog`. +2. On the left click `Model catalog`, then under `Introducing Llama 2` click `View models`. +3. Click the model you want to deploy. +4. Click `Deploy -> Real-time endpoint`. +5. Select `Skip Azure AI Content Safety` and click `Proceed`. +6. Select a virtual machine and click `Deploy`. You must have sufficient quota to deploy the models. +7. In the menu on the left, click `Endpoints` and select the endpoint you've just created. +8. After the deployment is complete, you'll see `Consume` tab where the endpoint URL and authentication key will be + provided. +9. Now you can start using the model by providing the API key, endpoint URL and deployment name to + `llm_wrapper.models.AzureLlama2Model` + +In case of any problems with deployment, you can review this guide on the Azure blog: +[Introducing Llama 2 on Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233) + diff --git a/docs/tutorial/error_handling.md b/docs/tutorial/error_handling.md new file mode 100644 index 0000000..2f68ed1 --- /dev/null +++ b/docs/tutorial/error_handling.md @@ -0,0 +1,43 @@ +--- +layout: default +title: Error handling +nav_order: 3 +parent: Tutorial +--- + + +## Too long prompt +Each LLM has its own context size defined. This is the maximum number of input plus output tokens that the model is able +to consume. `llm-wrapper` before sending the request to the model automatically checks if your input data will fit into +the model's context size and if not it'll either: +- raise `ValueError` saying that your prompt is too long if the prompt alone has already more tokens than the allowed + maximum context size of the model +- log warning saying that number of prompt tokens plus generated tokens may exceed the max allowed number of tokens of + the model if the number of tokens in the prompt plus the `max_output_tokens` you set for the model is longer than the + allowed maximum context size of the model + +In the first case, the only solution is to truncate the input data, so that it'll fit into the context size of the +model. + +The second case is just a warning, because the model will be able to start the generation, but it may fail randomly +if the number of generated tokens will be long enough to exceed the model maximum context size. In this case you have +two options. You can either truncate the input data or lower the `max_output_tokens` so that they added together won't +exceed the max context size. + +In the future releases, we plan to add automatic long sequences handling. Then the package will be able to automatically +split the whole input into shorter chunks, process them separately and combine the outputs. But it's not there yet. + + +## Output parsing errors +If you use the [Forcing model response format](forcing_response_format.md) functionality, sometimes the model can +generate a response that actually doesn't comform to the provided output data schema. In this case, `llm-wrapper` won't +be able to parse the output to the provided output data model class. So as a response you'll get a `ResponseData` where +`ResponseData.response` will be a raw, unparsed response from the model, and the `ResponseData.error` will be +`OutputParserException`. + + +## API errors +`llm-wrapper` automatically retries failed requests. But even with this feature, the model can fail to return a response +more times than the maximum number of retries (which is currently set to 8) or some other unexpected errors may occur. +In all of these cases, `ResponseData.error` will contain the exception that occurred. So a good rule of thumb is to +first check the `ResponseData.error` and only if it's empty move to processing the response of the model. \ No newline at end of file diff --git a/docs/tutorial/forcing_response_format.md b/docs/tutorial/forcing_response_format.md new file mode 100644 index 0000000..0b50206 --- /dev/null +++ b/docs/tutorial/forcing_response_format.md @@ -0,0 +1,96 @@ +--- +layout: default +title: Forcing model response format +nav_order: 2 +parent: Tutorial +--- + +If you want to force the model to output the response in a given JSON schema, `llm-wrapper` provides an easy way to do +it. You just need to provide a data model that describes the desired output format and the package does all the rest. +As an output you get string already parsed to a provided data model class. + +Here's how to use this functionality step by step: +1. Define the desired output data model class. It needs to inherit from pydantic `BaseModel`. Each field should have +type defined and a description provided in `Field()` which should describe what given field means. By providing accurate +description, you make it easier for the model to generate proper response. + ```python + import typing + + from pydantic import BaseModel, Field + + class ReviewOutputDataModel(BaseModel): + summary: str = Field(description="Summary of a product description") + should_buy: bool = Field(description="Recommendation whether I should buy the product or not") + brand_name: str = Field(description="Brand of the coffee") + aroma:str = Field(description="Description of the coffee aroma") + cons: typing.List[str] = Field(description="List of cons of the coffee") + ``` + +2. Provide the data model class together with prompt and input data to the `.generate()` method. `llm-wrapper` will +automatically force the model to output the data in the provided format and will parse the string returned from the +model to the provided data model class. + + ```python + review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." + + prompt = "Summarize review of the coffee. Review: {review}" + input_data = [ + InputData(input_mappings={"review": review}, id="0") + ] + + responses = model.generate( + prompt=prompt, + input_data=input_data, + output_data_model_class=ReviewOutputDataModel + ) + response = responses[0].response + ``` + +Now we can check the response: +```python +>>> type(response) +ReviewOutputDataModel + +>>> response.should_buy +False + +>>> response.brand_name +"Blue Orca" + +>>> response.aroma +"Not mentioned in the review" + +>>> response.cons +['Weak in terms of strength', 'Weak in terms of taste'] +``` + +### What to do when output formatting doesn't work? + +The feature described above works best with advanced proprietary models like GPT and PaLM/Gemini. Less capable models like Llama2 or Mistral +may not able to understand instructions passed as output_dataclasses, and in most cases the returned response won't be compatible +with the defined format, resulting in an unexpected response. + +In such cases, we recommend to address the issue by specifying in the prompt how the response should look like. Using +few-shot learning techniques is also advisable. In the case of JSON-like output, use double curly brackets to escape them in order +to use them in the JSON example. + +### How forcing response format works under the hood? +To force the model to provide output in a desired format, under the hood `llm-wrapper` automatically adds a description +of the desired output format. For example, for the `ReviewOutputDataModel` the description looks like this: +````text +The output should be formatted as a JSON instance that conforms to the JSON schema below. + +As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}} +the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted. + +Here is the output schema: +``` +{"properties": {"summary": {"title": "Summary", "description": "Summary of a product description", "type": "string"}, "should_buy": {"title": "Should Buy", "description": "Recommendation whether I should buy the product or not", "type": "boolean"}, "brand_name": {"title": "Brand Name", "description": "Brand of the coffee", "type": "string"}, "aroma": {"title": "Aroma", "description": "Description of the coffee aroma", "type": "string"}, "cons": {"title": "Cons", "description": "List of cons of the coffee", "type": "array", "items": {"type": "string"}}}, "required": ["summary", "should_buy", "brand_name", "aroma", "cons"]} +``` +```` + +This feature is really helpful, but you have to bear in mind that by using it you increase the number or prompt tokens +so it'll make the requests more costly (if you're using model with per token pricing) + +If the model will return an output that doesn't comform to the defined data model, raw model response will be returned +in `ResponseData.response` and `ResponseData.error` will be `OutputParserException`. \ No newline at end of file diff --git a/docs/tutorial/index.md b/docs/tutorial/index.md new file mode 100644 index 0000000..ac376e9 --- /dev/null +++ b/docs/tutorial/index.md @@ -0,0 +1,45 @@ +--- +layout: default +title: Tutorial +nav_order: 3 +has_children: true +--- + +## Unleash the Library's Potential + +Dive into our tutorial to fully grasp the library's capabilities. From beginners to experts, it's designed to accommodate all levels of experience. + +### What You'll Gain + +- **Discover Core Features**: Learn how to harness the library's power for your projects. +- **Efficient Learning**: Save time with hands-on examples, tips, and best practices. +- **Hidden Gems**: Unlock advanced functions that can enhance your work. +- **Troubleshooting**: Find solutions to common issues. +- **Community Support**: Join a helpful user community. + +### Access the Tutorial + +Visit our documentation to access the tutorial. It's your key to maximizing the potential of the library, whether you're using it for research, development, or any other application. Start your journey now and witness the limitless possibilities it offers! + + +## Before You Begin + +All the examples presented in the tutorials assume that you pass a configuration object to the model. How to do +it is described in detail in the Quick Start section. For example, for the Azure GPT model it's done like this: +```python +from llm_wrapper.models.azure_openai import AzureOpenAIModel +from llm_wrapper.domain.configuration import AzureOpenAIConfiguration + +configuration = AzureOpenAIConfiguration( + api_key="", + base_url="", + api_version="", + deployment="", + model_name="" +) + +model = AzureOpenAIModel(config=configuration) +``` + +------- +_*Documentation Powered by GPT-3.5-turbo_ \ No newline at end of file diff --git a/docs/tutorial/single_query.md b/docs/tutorial/single_query.md new file mode 100644 index 0000000..4481648 --- /dev/null +++ b/docs/tutorial/single_query.md @@ -0,0 +1,17 @@ +--- +layout: default +title: Single query +nav_order: 0 +parent: Tutorial +--- + +In the simplest approach you just need to pass a prompt and the model will provide a response for it. + +```python +>>> model.generate("What is the capital of Poland?") +[ResponseData(response='The capital of Poland is Warsaw.', input_data=None, number_of_prompt_tokens=7, number_of_generated_tokens=7, error=None)] +``` + +As a response you'll get `List[ResponseData]`, where the first element will contain response from the model in the +`ResponseData.response` field and also information about number of prompt and generated tokens. If any error occurred +also `ResponseData.error` field will be filled with the actual exception. diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..68ef2d3 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,2 @@ +site_name: llm-wrapper +theme: readthedocs \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 72f5d5c..12118aa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.9.3" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -100,6 +101,7 @@ speedups = ["Brotli", "aiodns", "brotlicffi"] name = "aioresponses" version = "0.7.6" description = "Mock out requests made by ClientSession from aiohttp package" +category = "main" optional = false python-versions = "*" files = [ @@ -114,6 +116,7 @@ aiohttp = ">=3.3.0,<4.0.0" name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -128,6 +131,7 @@ frozenlist = ">=1.1.0" name = "anyio" version = "4.2.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -150,6 +154,7 @@ trio = ["trio (>=0.23)"] name = "astroid" version = "2.15.8" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -169,6 +174,7 @@ wrapt = [ name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -180,6 +186,7 @@ files = [ name = "attrs" version = "23.2.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -199,6 +206,7 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p name = "cachetools" version = "5.3.2" description = "Extensible memoizing collections and decorators" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -210,6 +218,7 @@ files = [ name = "certifi" version = "2023.11.17" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -221,6 +230,7 @@ files = [ name = "charset-normalizer" version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -316,10 +326,26 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -331,6 +357,7 @@ files = [ name = "dataclasses-json" version = "0.6.3" description = "Easily serialize dataclasses to and from JSON." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -346,6 +373,7 @@ typing-inspect = ">=0.4.0,<1" name = "dill" version = "0.3.8" description = "serialize all of Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -361,6 +389,7 @@ profile = ["gprof2dot (>=2022.7.29)"] name = "exceptiongroup" version = "1.2.0" description = "Backport of PEP 654 (exception groups)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -375,6 +404,7 @@ test = ["pytest (>=6)"] name = "filelock" version = "3.13.1" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -391,6 +421,7 @@ typing = ["typing-extensions (>=4.8)"] name = "frozenlist" version = "1.4.1" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -477,6 +508,7 @@ files = [ name = "fsspec" version = "2023.12.2" description = "File-system specification" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -508,10 +540,29 @@ smb = ["smbprotocol"] ssh = ["paramiko"] tqdm = ["tqdm"] +[[package]] +name = "ghp-import" +version = "2.1.0" +description = "Copy your docs directly to the gh-pages branch." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, + {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1" + +[package.extras] +dev = ["flake8", "markdown", "twine", "wheel"] + [[package]] name = "google-api-core" version = "2.16.1" description = "Google API client core library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -542,6 +593,7 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] name = "google-auth" version = "2.27.0" description = "Google Authentication Library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -565,6 +617,7 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] name = "google-cloud-aiplatform" version = "1.38.0" description = "Vertex AI API client library" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -573,7 +626,7 @@ files = [ ] [package.dependencies] -google-api-core = {version = ">=1.32.0,<2.0.dev0 || >=2.8.dev0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.32.0,<2.0.0 || >=2.8.0,<3.0.0dev", extras = ["grpc"]} google-cloud-bigquery = ">=1.15.0,<4.0.0dev" google-cloud-resource-manager = ">=1.3.3,<3.0.0dev" google-cloud-storage = ">=1.32.0,<3.0.0dev" @@ -605,6 +658,7 @@ xai = ["tensorflow (>=2.3.0,<3.0.0dev)"] name = "google-cloud-bigquery" version = "3.17.1" description = "Google BigQuery API client library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -613,7 +667,7 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" packaging = ">=20.0.0" @@ -635,6 +689,7 @@ tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] name = "google-cloud-core" version = "2.4.1" description = "Google Cloud API client core library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -643,7 +698,7 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.6,<2.0.0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" [package.extras] @@ -653,6 +708,7 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] name = "google-cloud-resource-manager" version = "1.11.0" description = "Google Cloud Resource Manager API client library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -661,7 +717,7 @@ files = [ ] [package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -670,6 +726,7 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4 name = "google-cloud-storage" version = "2.14.0" description = "Google Cloud Storage API client library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -678,7 +735,7 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" google-auth = ">=2.23.3,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" google-crc32c = ">=1.0,<2.0dev" @@ -692,6 +749,7 @@ protobuf = ["protobuf (<5.0.0dev)"] name = "google-crc32c" version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -772,6 +830,7 @@ testing = ["pytest"] name = "google-resumable-media" version = "2.7.0" description = "Utilities for Google Media Downloads and Resumable Uploads" +category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -790,6 +849,7 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] name = "googleapis-common-protos" version = "1.62.0" description = "Common protobufs used in Google APIs" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -808,6 +868,7 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] name = "greenlet" version = "3.0.3" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -879,6 +940,7 @@ test = ["objgraph", "psutil"] name = "grpc-google-iam-v1" version = "0.13.0" description = "IAM API client library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -895,6 +957,7 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 name = "grpcio" version = "1.60.0" description = "HTTP/2-based RPC framework" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -961,6 +1024,7 @@ protobuf = ["grpcio-tools (>=1.60.0)"] name = "grpcio-status" version = "1.60.0" description = "Status proto mapping for gRPC" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -977,6 +1041,7 @@ protobuf = ">=4.21.6" name = "huggingface-hub" version = "0.20.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -1009,6 +1074,7 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t name = "idna" version = "3.6" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1020,6 +1086,7 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1031,6 +1098,7 @@ files = [ name = "isort" version = "5.13.2" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -1041,10 +1109,29 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] +[[package]] +name = "jinja2" +version = "3.1.3" +description = "A very fast and expressive template engine." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + [[package]] name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -1059,6 +1146,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -1070,6 +1158,7 @@ files = [ name = "langchain" version = "0.0.351" description = "Building applications with LLMs through composability" +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1110,6 +1199,7 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] name = "langchain-community" version = "0.0.16" description = "Community contributed LangChain integrations." +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1136,6 +1226,7 @@ extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15. name = "langchain-core" version = "0.1.17" description = "Building applications with LLMs through composability" +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1160,6 +1251,7 @@ extended-testing = ["jinja2 (>=3,<4)"] name = "langsmith" version = "0.0.85" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1175,6 +1267,7 @@ requests = ">=2,<3" name = "lazy-object-proxy" version = "1.10.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1217,10 +1310,97 @@ files = [ {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, ] +[[package]] +name = "markdown" +version = "3.5.2" +description = "Python implementation of John Gruber's Markdown." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "Markdown-3.5.2-py3-none-any.whl", hash = "sha256:d43323865d89fc0cb9b20c75fc8ad313af307cc087e84b657d9eec768eddeadd"}, + {file = "Markdown-3.5.2.tar.gz", hash = "sha256:e1ac7b3dc550ee80e602e71c1d168002f062e49f1b11e26a36264dafd4df2ef8"}, +] + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markupsafe" +version = "2.1.5" +description = "Safely add untrusted strings to HTML/XML markup." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, +] + [[package]] name = "marshmallow" version = "3.20.2" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1241,6 +1421,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1248,10 +1429,54 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mergedeep" +version = "1.3.4" +description = "A deep merge function for 🐍." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, + {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, +] + +[[package]] +name = "mkdocs" +version = "1.5.3" +description = "Project documentation with Markdown." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs-1.5.3-py3-none-any.whl", hash = "sha256:3b3a78e736b31158d64dbb2f8ba29bd46a379d0c6e324c2246c3bc3d2189cfc1"}, + {file = "mkdocs-1.5.3.tar.gz", hash = "sha256:eb7c99214dcb945313ba30426c2451b735992c73c2e10838f76d09e39ff4d0e2"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} +ghp-import = ">=1.0" +jinja2 = ">=2.11.1" +markdown = ">=3.2.1" +markupsafe = ">=2.0.1" +mergedeep = ">=1.3.4" +packaging = ">=20.5" +pathspec = ">=0.11.1" +platformdirs = ">=2.2.0" +pyyaml = ">=5.1" +pyyaml-env-tag = ">=0.1" +watchdog = ">=2.0" + +[package.extras] +i18n = ["babel (>=2.9.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pathspec (==0.11.1)", "platformdirs (==2.2.0)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] + [[package]] name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1335,6 +1560,7 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1346,6 +1572,7 @@ files = [ name = "numpy" version = "1.26.3" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -1391,6 +1618,7 @@ files = [ name = "openai" version = "0.27.10" description = "Python client library for the OpenAI API" +category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -1405,7 +1633,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -1413,6 +1641,7 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1424,6 +1653,7 @@ files = [ name = "pandas" version = "2.2.0" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -1492,10 +1722,23 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + [[package]] name = "platformdirs" version = "4.2.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1511,6 +1754,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest- name = "pluggy" version = "1.4.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1526,6 +1770,7 @@ testing = ["pytest", "pytest-benchmark"] name = "proto-plus" version = "1.23.0" description = "Beautiful, Pythonic protocol buffers." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1543,6 +1788,7 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] name = "protobuf" version = "4.25.2" description = "" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1563,6 +1809,7 @@ files = [ name = "pyasn1" version = "0.5.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -1574,6 +1821,7 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -1588,6 +1836,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pydantic" version = "1.10.13" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1640,6 +1889,7 @@ email = ["email-validator (>=1.0.3)"] name = "pydash" version = "7.0.7" description = "The kitchen sink of Python utility libraries for doing \"stuff\" in a functional way. Based on the Lo-Dash Javascript library." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1657,6 +1907,7 @@ dev = ["black", "build", "coverage", "docformatter", "flake8", "flake8-black", " name = "pylint" version = "2.17.7" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -1685,6 +1936,7 @@ testutils = ["gitpython (>3)"] name = "pytest" version = "7.4.4" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1707,6 +1959,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1721,6 +1974,7 @@ six = ">=1.5" name = "pytz" version = "2023.4" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -1732,6 +1986,7 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1787,10 +2042,26 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "pyyaml-env-tag" +version = "0.1" +description = "A custom YAML tag for referencing environment variables in YAML files. " +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, + {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, +] + +[package.dependencies] +pyyaml = "*" + [[package]] name = "regex" version = "2023.12.25" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1893,6 +2164,7 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1914,6 +2186,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" +category = "main" optional = false python-versions = ">=3.6,<4" files = [ @@ -1928,6 +2201,7 @@ pyasn1 = ">=0.1.3" name = "safetensors" version = "0.4.2" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2060,6 +2334,7 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] name = "setuptools" version = "69.0.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2076,6 +2351,7 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar name = "shapely" version = "2.0.2" description = "Manipulation and analysis of geometric objects" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2126,13 +2402,14 @@ files = [ numpy = ">=1.14" [package.extras] -docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] +docs = ["matplotlib", "numpydoc (>=1.1.0,<1.2.0)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] test = ["pytest", "pytest-cov"] [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2144,6 +2421,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2155,6 +2433,7 @@ files = [ name = "sqlalchemy" version = "2.0.25" description = "Database Abstraction Library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2210,7 +2489,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} typing-extensions = ">=4.6.0" [package.extras] @@ -2242,6 +2521,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2256,6 +2536,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tiktoken" version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2301,6 +2582,7 @@ blobfile = ["blobfile (>=2)"] name = "tokenizers" version = "0.15.1" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2428,6 +2710,7 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2439,6 +2722,7 @@ files = [ name = "tomlkit" version = "0.12.3" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2450,6 +2734,7 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2470,6 +2755,7 @@ telegram = ["requests"] name = "transformers" version = "4.37.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -2538,6 +2824,7 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] name = "typing-extensions" version = "4.9.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2549,6 +2836,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." +category = "main" optional = false python-versions = "*" files = [ @@ -2564,6 +2852,7 @@ typing-extensions = ">=3.7.4" name = "tzdata" version = "2023.4" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -2575,6 +2864,7 @@ files = [ name = "urllib3" version = "2.2.0" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2588,10 +2878,53 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "watchdog" +version = "4.0.0" +description = "Filesystem events monitoring" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8"}, + {file = "watchdog-4.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b"}, + {file = "watchdog-4.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92"}, + {file = "watchdog-4.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269"}, + {file = "watchdog-4.0.0-py3-none-win32.whl", hash = "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c"}, + {file = "watchdog-4.0.0-py3-none-win_amd64.whl", hash = "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245"}, + {file = "watchdog-4.0.0-py3-none-win_ia64.whl", hash = "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7"}, + {file = "watchdog-4.0.0.tar.gz", hash = "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [[package]] name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2671,6 +3004,7 @@ files = [ name = "yarl" version = "1.9.4" description = "Yet another URL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2773,4 +3107,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "b65b9b32640a832138e5ba05fd3617f79f299d381a7a11c4864b69419afe6c5f" +content-hash = "29b96b21c1ea831a18a30318a11264dd4e7ffd5706565329b64477aaa35d568e" diff --git a/pyproject.toml b/pyproject.toml index 8944546..b299d30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ aioresponses = "^0.7.6" [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" pylint = "^2.17.4" +mkdocs = "^1.5.3" [build-system] requires = ["poetry-core"]