From 673d6bd6d4406e1c16b85498240771515b39bce7 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Tue, 26 Sep 2023 02:41:18 +0300 Subject: [PATCH 01/14] using prance to resolve refs, hence this will remove lot of unnecessary code --- .github/workflows/build-docker.yml | 54 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 997f54f32..051d1c505 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -1,38 +1,38 @@ -name: Docker Build +# name: Docker Build on: push: branches: - disabled -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest] +# jobs: +# build: +# runs-on: ${{ matrix.os }} +# strategy: +# matrix: +# os: [ubuntu-latest, macos-latest] - steps: - - name: Checkout Code - uses: actions/checkout@v2 +# steps: +# - name: Checkout Code +# uses: actions/checkout@v2 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 +# - name: Set up Docker Buildx +# uses: docker/setup-buildx-action@v1 - - name: Build and Push Images - run: | - IMAGE_NAME="opencopilot/${{ matrix.os }}-backend" - docker buildx create --use - docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME backend/ - echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin - docker push $IMAGE_NAME +# - name: Build and Push Images +# run: | +# IMAGE_NAME="opencopilot/${{ matrix.os }}-backend" +# docker buildx create --use +# docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME backend/ +# echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin +# docker push $IMAGE_NAME - IMAGE_NAME="opencopilot/${{ matrix.os }}-llm-server" - docker buildx create --use - docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME llm-server/ - echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin - docker push $IMAGE_NAME +# IMAGE_NAME="opencopilot/${{ matrix.os }}-llm-server" +# docker buildx create --use +# docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME llm-server/ +# echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin +# docker push $IMAGE_NAME - env: - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} +# env: +# DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} +# DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} From 272e1b2b734b31cb90d0d44f776b485d1f786262 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Tue, 26 Sep 2023 20:00:32 +0300 Subject: [PATCH 02/14] Adding swagger parsers to enable parsing of swagger spec 2.0 api as well --- .github/workflows/build-docker.yml | 54 +-- llm-server/notebooks/openapi.ipynb | 141 -------- llm-server/notebooks/swagger_examples.ipynb | 340 ++++++++++++++++++ .../workflow/extractors/example_generator.py | 5 +- .../workflow/extractors/extract_body.py | 2 +- .../workflow/extractors/extract_param.py | 2 +- .../workflow/generate_openapi_payload.py | 269 ++++---------- .../routes/workflow/workflow_service.py | 3 +- llm-server/utils/make_api_call.py | 10 +- 9 files changed, 456 insertions(+), 370 deletions(-) delete mode 100644 llm-server/notebooks/openapi.ipynb create mode 100644 llm-server/notebooks/swagger_examples.ipynb diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 051d1c505..997f54f32 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -1,38 +1,38 @@ -# name: Docker Build +name: Docker Build on: push: branches: - disabled -# jobs: -# build: -# runs-on: ${{ matrix.os }} -# strategy: -# matrix: -# os: [ubuntu-latest, macos-latest] +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] -# steps: -# - name: Checkout Code -# uses: actions/checkout@v2 + steps: + - name: Checkout Code + uses: actions/checkout@v2 -# - name: Set up Docker Buildx -# uses: docker/setup-buildx-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 -# - name: Build and Push Images -# run: | -# IMAGE_NAME="opencopilot/${{ matrix.os }}-backend" -# docker buildx create --use -# docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME backend/ -# echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin -# docker push $IMAGE_NAME + - name: Build and Push Images + run: | + IMAGE_NAME="opencopilot/${{ matrix.os }}-backend" + docker buildx create --use + docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME backend/ + echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin + docker push $IMAGE_NAME -# IMAGE_NAME="opencopilot/${{ matrix.os }}-llm-server" -# docker buildx create --use -# docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME llm-server/ -# echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin -# docker push $IMAGE_NAME + IMAGE_NAME="opencopilot/${{ matrix.os }}-llm-server" + docker buildx create --use + docker buildx build --platform linux/amd64,linux/arm64 -t $IMAGE_NAME llm-server/ + echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin + docker push $IMAGE_NAME -# env: -# DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} -# DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} diff --git a/llm-server/notebooks/openapi.ipynb b/llm-server/notebooks/openapi.ipynb deleted file mode 100644 index e6d8f02d2..000000000 --- a/llm-server/notebooks/openapi.ipynb +++ /dev/null @@ -1,141 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### FEW SHOT PROMPT EXAMPLE\n", - "Automatically derive the json from the schema, and use it as an example for the large language model to construct the next payload using prev conversations" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Any, Dict, List, Union\n", - "import json\n", - "from jsonschema import Draft7Validator, exceptions\n", - "from faker import Faker\n", - "import random\n", - "\n", - "fake = Faker()\n", - "\n", - "def generate_example_json(schema: Dict[str, Any], num_items: int = 1, include_optional: bool = True) -> Union[Dict[str, Any], List[Dict[str, Any]]]:\n", - " def generate_example_property(property_schema: Dict[str, Any], required: bool = True) -> Any:\n", - " if \"example\" in property_schema:\n", - " return property_schema[\"example\"]\n", - "\n", - " if \"type\" in property_schema:\n", - " if \"format\" in property_schema:\n", - " return generate_example_with_format(property_schema)\n", - " elif property_schema[\"type\"] == \"object\":\n", - " example_property: Dict[str, Any] = {}\n", - " if \"properties\" in property_schema:\n", - " for prop_name, prop_schema in property_schema[\"properties\"].items():\n", - " # Check if property is required in the schema\n", - " is_required = required and prop_name in schema.get(\"required\", [])\n", - " if is_required or include_optional:\n", - " example_property[prop_name] = generate_example_property(prop_schema, is_required)\n", - " return example_property\n", - " elif property_schema[\"type\"] == \"array\":\n", - " example_property = []\n", - " if \"items\" in property_schema:\n", - " for _ in range(num_items):\n", - " example_property.append(generate_example_property(property_schema[\"items\"]))\n", - " return example_property\n", - " elif property_schema[\"type\"] == \"string\":\n", - " if \"enum\" in property_schema:\n", - " return random.choice(property_schema[\"enum\"])\n", - " else:\n", - " return fake.word()\n", - " elif property_schema[\"type\"] == \"integer\":\n", - " return fake.random_int(min=0, max=100)\n", - " elif property_schema[\"type\"] == \"number\":\n", - " return fake.random_number(decimals=2, min_value=0, max_value=100)\n", - " elif property_schema[\"type\"] == \"boolean\":\n", - " return fake.boolean()\n", - " elif property_schema[\"type\"] == \"null\":\n", - " return None\n", - "\n", - " def generate_example_with_format(property_schema: Dict[str, Any]) -> Any:\n", - " format_type = property_schema[\"format\"]\n", - " \n", - " if format_type == \"date-time\":\n", - " return fake.iso8601()\n", - " elif format_type == \"date\":\n", - " return fake.date()\n", - " elif format_type == \"int64\":\n", - " return fake.random_int(min=0, max=9223372036854775807)\n", - " elif format_type == \"int32\":\n", - " return fake.random_int(min=0, max=2147483647)\n", - " else:\n", - " return fake.word()\n", - "\n", - " example_json: Union[Dict[str, Any], List[Dict[str, Any]]] = {}\n", - " \n", - " # Handle root-level arrays\n", - " if schema.get(\"type\") == \"array\":\n", - " example_json = []\n", - " for _ in range(num_items):\n", - " example_json.append(generate_example_property(schema[\"items\"]))\n", - " else:\n", - " for prop_name, prop_schema in schema[\"properties\"].items():\n", - " # Check if property is required in the schema\n", - " is_required = prop_name in schema.get(\"required\", [])\n", - " if is_required or include_optional:\n", - " example_json[prop_name] = generate_example_property(prop_schema, is_required)\n", - "\n", - " return example_json\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'id': 10,\n", - " 'petId': 198772,\n", - " 'quantity': 7,\n", - " 'shipDate': '2012-06-24T10:55:10',\n", - " 'status': 'approved',\n", - " 'complete': True}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "generate_example_json({'properties': {'id': {'type': 'integer', 'format': 'int64', 'example': 10}, 'petId': {'type': 'integer', 'format': 'int64', 'example': 198772}, 'quantity': {'type': 'integer', 'format': 'int32', 'example': 7}, 'shipDate': {'type': 'string', 'format': 'date-time'}, 'status': {'type': 'string', 'description': 'Order Status', 'example': 'approved', 'enum': ['placed', 'approved', 'delivered']}, 'complete': {'type': 'boolean'}}})" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.17" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/llm-server/notebooks/swagger_examples.ipynb b/llm-server/notebooks/swagger_examples.ipynb new file mode 100644 index 000000000..3cfe3e436 --- /dev/null +++ b/llm-server/notebooks/swagger_examples.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SWAGGER FUNCTIONS\n", + "Automatically derive the json from the schema, and use it as an example for the large language model to construct the next payload using prev conversations" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: prance in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (23.6.21.0)\n", + "Requirement already satisfied: chardet>=3.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (4.0.0)\n", + "Requirement already satisfied: ruamel.yaml>=0.17.10 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (0.17.21)\n", + "Requirement already satisfied: requests>=2.25 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (2.31.0)\n", + "Requirement already satisfied: six~=1.15 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (1.16.0)\n", + "Requirement already satisfied: packaging>=21.3 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (23.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (2023.7.22)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install prance" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import prance" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from prance import ResolvingParser" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: openapi-spec-validator in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (0.6.0)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.18.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (4.19.1)\n", + "Requirement already satisfied: jsonschema-spec<0.3.0,>=0.2.3 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (0.2.4)\n", + "Requirement already satisfied: lazy-object-proxy<2.0.0,>=1.7.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (1.9.0)\n", + "Requirement already satisfied: openapi-schema-validator<0.7.0,>=0.6.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (0.6.1)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (23.1.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (2023.7.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (0.30.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (0.10.2)\n", + "Requirement already satisfied: PyYAML>=5.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (6.0)\n", + "Requirement already satisfied: pathable<0.5.0,>=0.4.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (0.4.3)\n", + "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (2.31.0)\n", + "Requirement already satisfied: rfc3339-validator in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-schema-validator<0.7.0,>=0.6.0->openapi-spec-validator) (0.1.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (2023.7.22)\n", + "Requirement already satisfied: six in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from rfc3339-validator->openapi-schema-validator<0.7.0,>=0.6.0->openapi-spec-validator) (1.16.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install openapi-spec-validator" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "parser = ResolvingParser('https://petstore3.swagger.io/api/v3/openapi.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'3.0.2'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parser.specification[\"openapi\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'url': '/api/v3'}]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parser.specification[\"servers\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict, Union, Optional\n", + "\n", + "def get_api_info_by_operation_id(data: Dict[str, Dict[str, dict]], target_operation_id: str) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]]]]:\n", + " api_info = {\n", + " \"endpoint\": None,\n", + " \"method\": None,\n", + " \"path_params\": {},\n", + " \"query_params\": {},\n", + " \"body_schema\": None\n", + " }\n", + "\n", + " for path, methods in data[\"paths\"].items():\n", + " for method, details in methods.items():\n", + " if \"operationId\" in details and details[\"operationId\"] == target_operation_id:\n", + " # Extract endpoint and method\n", + " api_info[\"endpoint\"] = path\n", + " api_info[\"method\"] = method.upper()\n", + "\n", + " # Extract path parameters and their schemas\n", + " path_params = {}\n", + " for parameter in details.get(\"parameters\", []):\n", + " if parameter[\"in\"] == \"path\":\n", + " param_name = parameter[\"name\"]\n", + " param_schema = parameter.get(\"schema\", {})\n", + " path_params[param_name] = param_schema\n", + " api_info[\"path_params\"] = path_params\n", + "\n", + " # Extract query parameters and their schemas\n", + " query_params = {}\n", + " for parameter in details.get(\"parameters\", []):\n", + " if parameter[\"in\"] == \"query\":\n", + " param_name = parameter[\"name\"]\n", + " param_schema = parameter.get(\"schema\", {})\n", + " query_params[param_name] = param_schema\n", + " api_info[\"query_params\"] = query_params\n", + "\n", + " # Extract request body schema\n", + " if \"requestBody\" in details:\n", + " request_body = details[\"requestBody\"]\n", + " if \"content\" in request_body and \"application/json\" in request_body[\"content\"]:\n", + " api_info[\"body_schema\"] = request_body[\"content\"][\"application/json\"][\"schema\"]\n", + "\n", + " return api_info" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'endpoint': '/pet/findByStatus',\n", + " 'method': 'GET',\n", + " 'path_params': {},\n", + " 'query_params': {'status': {'type': 'string',\n", + " 'default': 'available',\n", + " 'enum': ['available', 'pending', 'sold']}},\n", + " 'body_schema': None}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_api_info_by_operation_id(parser.specification, 'findPetsByStatus')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'endpoint': '/pet/{petId}',\n", + " 'method': 'GET',\n", + " 'path_params': {'petId': {'type': 'integer', 'format': 'int64'}},\n", + " 'query_params': {},\n", + " 'body_schema': None}" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_api_info_by_operation_id(parser.specification, 'getPetById')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'endpoint': '/pet/{petId}',\n", + " 'method': 'POST',\n", + " 'path_params': {'petId': {'type': 'integer', 'format': 'int64'}},\n", + " 'query_params': {'name': {'type': 'string'}, 'status': {'type': 'string'}},\n", + " 'body_schema': None}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_api_info_by_operation_id(parser.specification, 'updatePetWithForm')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'endpoint': '/user/{username}',\n", + " 'method': 'PUT',\n", + " 'path_params': {'username': {'type': 'string'}},\n", + " 'query_params': {},\n", + " 'body_schema': {'type': 'object',\n", + " 'properties': {'id': {'type': 'integer', 'format': 'int64', 'example': 10},\n", + " 'username': {'type': 'string', 'example': 'theUser'},\n", + " 'firstName': {'type': 'string', 'example': 'John'},\n", + " 'lastName': {'type': 'string', 'example': 'James'},\n", + " 'email': {'type': 'string', 'example': 'john@email.com'},\n", + " 'password': {'type': 'string', 'example': '12345'},\n", + " 'phone': {'type': 'string', 'example': '12345'},\n", + " 'userStatus': {'type': 'integer',\n", + " 'description': 'User Status',\n", + " 'format': 'int32',\n", + " 'example': 1}},\n", + " 'xml': {'name': 'user'}}}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_api_info_by_operation_id(json.loads(t), 'updateUser')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llm-server/routes/workflow/extractors/example_generator.py b/llm-server/routes/workflow/extractors/example_generator.py index 51324e5f0..f382bcf6f 100644 --- a/llm-server/routes/workflow/extractors/example_generator.py +++ b/llm-server/routes/workflow/extractors/example_generator.py @@ -95,8 +95,5 @@ def generate_example_with_format(property_schema: Dict[str, Any]) -> Any: return example_json -# This function can be used to generate an example value for llm, incase the response is not correct -# Also to be used when integrating tools as openai functions, the json output should be used to generate openai function dict -def generate_example_from_schema(input: Any) -> Any: - schema = input["requestBody"]["content"]["application/json"]["schema"] +def gen_ex_from_schema(schema: Any) -> Any: return json.dumps(generate_example_json(schema)) diff --git a/llm-server/routes/workflow/extractors/extract_body.py b/llm-server/routes/workflow/extractors/extract_body.py index c820340ed..1573b4608 100644 --- a/llm-server/routes/workflow/extractors/extract_body.py +++ b/llm-server/routes/workflow/extractors/extract_body.py @@ -11,7 +11,7 @@ llm = get_llm() -def extractBodyFromSchema( +def gen_body_from_schema( body_schema: JsonData, text: str, prev_api_response: str, example: str ) -> Any: _DEFAULT_TEMPLATE = """To enable a substantially intelligent language model to execute a series of APIs sequentially, the following essential details are necessary to gather information needed for the next API call: diff --git a/llm-server/routes/workflow/extractors/extract_param.py b/llm-server/routes/workflow/extractors/extract_param.py index 0e464f3e9..637894829 100644 --- a/llm-server/routes/workflow/extractors/extract_param.py +++ b/llm-server/routes/workflow/extractors/extract_param.py @@ -10,7 +10,7 @@ llm = get_llm() -def extractParamsFromSchema( +def gen_params_from_schema( param_schema: JsonData, text: str, prev_resp: str ) -> Optional[JsonData]: """Extracts API parameters from a schema based on user text and previous response. diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 6ed9f85cd..74e7e2f71 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -4,16 +4,15 @@ from langchain.tools.json.tool import JsonSpec from utils.get_llm import get_llm from dotenv import load_dotenv -from routes.workflow.extractors.extract_body import extractBodyFromSchema -from routes.workflow.extractors.extract_param import extractParamsFromSchema -from routes.workflow.extractors.hydrate_params import ( - hydrateParams, - replace_ref_with_value, -) +from .extractors.example_generator import gen_ex_from_schema +from routes.workflow.extractors.extract_param import gen_params_from_schema +from routes.workflow.extractors.extract_body import gen_body_from_schema from custom_types.t_json import JsonData from custom_types.swagger import ApiOperation from typing import Dict, Any, Optional, Union, Tuple -from .extractors.example_generator import generate_example_from_schema +from routes.workflow.load_openapi_spec import load_openapi_spec + +from prance import ResolvingParser load_dotenv() @@ -24,123 +23,58 @@ from typing import Dict, Any, Optional, Union, List -def get_api_operation_by_id( - json_spec: Any, op_id: str -) -> Tuple[ApiOperation, str, str]: - """ - Get an API operation by its operationId from a OpenAPI/Swagger specification. - - Args: - json_spec: The OpenAPI/Swagger specification as a pydantic model. - op_id: The operationId to search for. - - Returns: - A tuple containing the ApiOperation definition, HTTP method, and path - for the matching operation. - - Raises: - ValueError: If no operation with the given op_id is found. - """ - paths: Dict[str, List[ApiOperation]] = json_spec.dict_.get("paths", {}) - - for path, methods in paths.items(): - if isinstance(methods, dict): - for method, operation in methods.items(): - # Check if 'operation' is a dictionary - if isinstance(operation, dict): - operation_id: Union[str, None] = operation.get("operationId") - - if operation_id == op_id: - return operation, method, path - - else: - # Handle invalid operation - pass - - raise ValueError(f"Failed to find operation with id {op_id} in spec {json_spec}") - - -def resolve_refs(input_dict: JsonData, json_spec: Dict[str, Any]) -> Any: - """ - Recursively resolves JSON reference ($ref) fields in a dictionary/list structure. - - Args: - input_dict: The dictionary or list to resolve references in. - json_spec: The full JSON specification containing reference definitions. - - Returns: - input_dict with any $ref fields resolved to their referenced value. - - """ - # Check if the input_dict is a dictionary and contains a '$ref' key - if isinstance(input_dict, dict) and "$ref" in input_dict: - ref_value = input_dict["$ref"] - paths = ref_value.split("/")[1:3] - if paths[0] in json_spec and paths[1] in json_spec[paths[0]]: - return json_spec[paths[0]][paths[1]] - - # Recursively process nested dictionaries and lists - if isinstance(input_dict, dict): - for key, value in input_dict.items(): - input_dict[key] = resolve_refs(value, json_spec) - elif isinstance(input_dict, list): - for i, item in enumerate(input_dict): - input_dict[i] = resolve_refs(item, json_spec) - - return input_dict - - -def resolve_request_body_schema_reference( - method: str, api_operation: ApiOperation, json_spec: Any -) -> Any: - """ - Resolves any JSON schema $ref pointers in the requestBody - of the given API operation against the given API spec. - - Args: - request_method: The HTTP method of the API operation - - api_operation: A dictionary containing a snippet of the API specification - in OpenAPI/Swagger format, describing a single operation of the API. - For example, this could be the schema for the request body of the - "addPet" operation. - - api_spec: The full API specification dictionary containing - the complete OpenAPI/Swagger schema. - - Returns: - The updated api_operation dictionary with any JSON reference pointers - resolved against the api_spec. - """ - content_type = "application/json" - requestBody = api_operation.get("requestBody") - - # Check if requestBody is None (i.e., it doesn't exist) - if requestBody is None: - return api_operation - - if not isinstance(requestBody, dict): - return api_operation - - content_types = requestBody.get("content", {}) - - # Check if the specified content type exists in the requestBody - if content_type in content_types: - content_type_schema = content_types[content_type].get("schema") - - # Check if the content type schema is a reference - if content_type_schema and "$ref" in content_type_schema: - ref_path = content_type_schema["$ref"].split("/")[1:] - - # Navigate through the JSON spec using the reference path - schema_node = json_spec.dict_ - for path_element in ref_path: - schema_node = schema_node.get(path_element, {}) - - # Update the content type schema with the resolved schema - content_types[content_type]["schema"] = schema_node +# get path param, query param and json body schema for a given operation id +def get_api_info_by_operation_id( + data: Dict[str, Dict[str, dict]], target_operation_id: str +) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]]]]: + api_info = { + "endpoint": None, + "method": None, + "path_params": {}, + "query_params": {}, + "body_schema": None, + } - return api_operation + for path, methods in data["paths"].items(): + for method, details in methods.items(): + if ( + "operationId" in details + and details["operationId"] == target_operation_id + ): + # Extract endpoint and method + api_info["endpoint"] = path + api_info["method"] = method.upper() + + # Extract path parameters and their schemas + path_params = {} + for parameter in details.get("parameters", []): + if parameter["in"] == "path": + param_name = parameter["name"] + param_schema = parameter.get("schema", {}) + path_params[param_name] = param_schema + api_info["path_params"] = path_params + + # Extract query parameters and their schemas + query_params = {} + for parameter in details.get("parameters", []): + if parameter["in"] == "query": + param_name = parameter["name"] + param_schema = parameter.get("schema", {}) + query_params[param_name] = param_schema + api_info["query_params"] = query_params + + # Extract request body schema + if "requestBody" in details: + request_body = details["requestBody"] + if ( + "content" in request_body + and "application/json" in request_body["content"] + ): + api_info["body_schema"] = request_body["content"][ + "application/json" + ]["schema"] + + return api_info def extract_json_payload(input_string: str) -> Optional[Any]: @@ -154,73 +88,28 @@ def extract_json_payload(input_string: str) -> Optional[Any]: def generate_openapi_payload( swagger_text: str, text: str, _operation_id: str, prev_api_response: str ) -> Dict[str, Any]: - """Generates an API request payload based on an OpenAPI spec. - Args: - spec_source (str): The path or URL to the OpenAPI spec file. - text (str): The original user text query. - _operation_id (str): The ID of the OpenAPI operation to target. - prev_api_response (str): The response from a previous API request, if any. - - Returns: - Dict[str, Any]: The generated request payload, containing keys for - "body", "params", "path", and "request_type". - - This function parses the given OpenAPI spec and constructs a request payload - for the operation matching the provided _operation_id. It extracts parameters - from the user text and previous API response to populate the payload. The - payload can then be used to call the target API. - """ - params: Optional[JsonData] = {} - body: Optional[Dict[str, Any]] = {} - spec_dict: Dict[str, Any] = json.loads(swagger_text) - # extracted_feature = extract_feature_from_user_query(text) - - # Continue with the rest of the code - json_spec: JsonSpec = JsonSpec(dict_=spec_dict, max_value_length=4000) - - api_operation: ApiOperation - method: str - path: str - api_operation, method, path = get_api_operation_by_id(json_spec, _operation_id) - - isolated_request: Dict[str, Any] = resolve_request_body_schema_reference( - method, api_operation, json_spec + parser = ResolvingParser(spec_source) + (a, b, c) = parser.version_parsed # (3,0,2), we can then apply transformation on + # add transformation for swagger v2 + + api_info = get_api_info_by_operation_id(parser.specification, _operation_id) + + path_params = gen_params_from_schema( + api_info["path_params"], text, prev_api_response + ) + query_params = gen_params_from_schema( + api_info["query_params"], text, prev_api_response ) - if isolated_request and "parameters" in isolated_request: - isolated_request["parameters"] = hydrateParams( - json_spec.dict_, isolated_request["parameters"] - ) - params = extractParamsFromSchema( - isolated_request["parameters"], text, prev_api_response - ) - - if ( - "requestBody" in api_operation - and "content" in api_operation["requestBody"] - and "application/json" in api_operation["requestBody"]["content"] - and "schema" in api_operation["requestBody"]["content"]["application/json"] - and "properties" - in api_operation["requestBody"]["content"]["application/json"]["schema"] - ): - body_schema: Dict[str, Any] = api_operation["requestBody"]["content"][ - "application/json" - ]["schema"]["properties"] - - # replace $ref recursively - replace_ref_with_value(body_schema, json_spec.dict_) - example = generate_example_from_schema(api_operation) - - print(f"Generator function output {example}") - body = extractBodyFromSchema(body_schema, text, prev_api_response, example) - else: - print("Some key is not present in the requestBody dictionary.") - - response = { - "body": body, - "params": params, - "path": path, - "request_type": method, - } + example = gen_ex_from_schema(api_info["body_schema"]) + body_schema = gen_body_from_schema( + api_info["body_schema"], text, prev_api_response, example + ) - return response + return { + "endpoint": api_info["endpoint"], + "method": api_info["method"], + "path_params": path_params, + "query_params": query_params, + "body_schema": body_schema, + } diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index 352c6e2bd..47819574c 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -99,7 +99,8 @@ def run_openapi_operations( request_type=api_payload["request_type"], url=api_payload["path"], body=api_payload["body"], - params=api_payload["params"], + params=api_payload["path_params"], + query_params=api_payload["query_params"] headers=headers, ) record_info[operation_id] = json.loads(api_response.text) diff --git a/llm-server/utils/make_api_call.py b/llm-server/utils/make_api_call.py index 7207f01e9..782dc0704 100644 --- a/llm-server/utils/make_api_call.py +++ b/llm-server/utils/make_api_call.py @@ -25,11 +25,11 @@ def replace_url_placeholders(url: str, values_dict: Dict[str, Any]) -> str: def make_api_request( - request_type: str, - url: str, - body: Dict[str, Any] = {}, - params: Dict[str, Any] = {}, - headers: Dict[str, Any] = {}, + endpoint: str, + method: str, + path_params: Dict[str, Any] = {}, + query_params: Dict[str, Any] = {}, + body_schema: Dict[str, Any] = {}, ) -> Response: url = replace_url_placeholders(url, params) # Create a session and configure it with headers From 092776b711cff989113a671056a09c8787b7e532 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Tue, 26 Sep 2023 23:02:00 +0300 Subject: [PATCH 03/14] Swagger parser additions --- llm-server/notebooks/swagger_examples.ipynb | 131 ++++++------------ .../workflow/generate_openapi_payload.py | 14 +- .../routes/workflow/workflow_service.py | 11 +- llm-server/utils/make_api_call.py | 36 ++--- 4 files changed, 78 insertions(+), 114 deletions(-) diff --git a/llm-server/notebooks/swagger_examples.ipynb b/llm-server/notebooks/swagger_examples.ipynb index 3cfe3e436..aa0490fce 100644 --- a/llm-server/notebooks/swagger_examples.ipynb +++ b/llm-server/notebooks/swagger_examples.ipynb @@ -10,34 +10,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 56, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: prance in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (23.6.21.0)\n", - "Requirement already satisfied: chardet>=3.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (4.0.0)\n", - "Requirement already satisfied: ruamel.yaml>=0.17.10 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (0.17.21)\n", - "Requirement already satisfied: requests>=2.25 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (2.31.0)\n", - "Requirement already satisfied: six~=1.15 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (1.16.0)\n", - "Requirement already satisfied: packaging>=21.3 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from prance) (23.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests>=2.25->prance) (2023.7.22)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ - "pip install prance" + "# pip install prance\n", + "# pip install openapi-spec-validator" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -55,42 +38,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: openapi-spec-validator in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (0.6.0)\n", - "Requirement already satisfied: jsonschema<5.0.0,>=4.18.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (4.19.1)\n", - "Requirement already satisfied: jsonschema-spec<0.3.0,>=0.2.3 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (0.2.4)\n", - "Requirement already satisfied: lazy-object-proxy<2.0.0,>=1.7.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (1.9.0)\n", - "Requirement already satisfied: openapi-schema-validator<0.7.0,>=0.6.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-spec-validator) (0.6.1)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (23.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (2023.7.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (0.30.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.18.0->openapi-spec-validator) (0.10.2)\n", - "Requirement already satisfied: PyYAML>=5.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (6.0)\n", - "Requirement already satisfied: pathable<0.5.0,>=0.4.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (0.4.3)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (2.31.0)\n", - "Requirement already satisfied: rfc3339-validator in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from openapi-schema-validator<0.7.0,>=0.6.0->openapi-spec-validator) (0.1.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from requests<3.0.0,>=2.31.0->jsonschema-spec<0.3.0,>=0.2.3->openapi-spec-validator) (2023.7.22)\n", - "Requirement already satisfied: six in /Users/shanurrahman/anaconda3/lib/python3.11/site-packages (from rfc3339-validator->openapi-schema-validator<0.7.0,>=0.6.0->openapi-spec-validator) (1.16.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "pip install openapi-spec-validator" - ] - }, - { - "cell_type": "code", - "execution_count": 6, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -108,7 +56,7 @@ "'3.0.2'" ] }, - "execution_count": 20, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -119,27 +67,27 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'url': '/api/v3'}]" + "'/api/v3'" ] }, - "execution_count": 30, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "parser.specification[\"servers\"]" + "parser.specification[\"servers\"][0]['url']" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -148,19 +96,20 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 63, "metadata": {}, "outputs": [], "source": [ - "from typing import Dict, Union, Optional\n", + "from typing import Dict, Union, Optional, List\n", "\n", - "def get_api_info_by_operation_id(data: Dict[str, Dict[str, dict]], target_operation_id: str) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]]]]:\n", + "def get_api_info_by_operation_id(data: Dict[str, Dict[str, dict]], target_operation_id: str) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]], List[str]]]:\n", " api_info = {\n", " \"endpoint\": None,\n", " \"method\": None,\n", " \"path_params\": {},\n", " \"query_params\": {},\n", - " \"body_schema\": None\n", + " \"body_schema\": None,\n", + " \"servers\": []\n", " }\n", "\n", " for path, methods in data[\"paths\"].items():\n", @@ -194,12 +143,17 @@ " if \"content\" in request_body and \"application/json\" in request_body[\"content\"]:\n", " api_info[\"body_schema\"] = request_body[\"content\"][\"application/json\"][\"schema\"]\n", "\n", + " # Extract server URLs\n", + " servers = data.get(\"servers\", [])\n", + " server_urls = [server[\"url\"] for server in servers]\n", + " api_info[\"servers\"] = server_urls\n", + "\n", " return api_info" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -211,10 +165,11 @@ " 'query_params': {'status': {'type': 'string',\n", " 'default': 'available',\n", " 'enum': ['available', 'pending', 'sold']}},\n", - " 'body_schema': None}" + " 'body_schema': None,\n", + " 'servers': ['/api/v3']}" ] }, - "execution_count": 27, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -225,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -235,10 +190,11 @@ " 'method': 'GET',\n", " 'path_params': {'petId': {'type': 'integer', 'format': 'int64'}},\n", " 'query_params': {},\n", - " 'body_schema': None}" + " 'body_schema': None,\n", + " 'servers': ['/api/v3']}" ] }, - "execution_count": 25, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -249,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -259,10 +215,11 @@ " 'method': 'POST',\n", " 'path_params': {'petId': {'type': 'integer', 'format': 'int64'}},\n", " 'query_params': {'name': {'type': 'string'}, 'status': {'type': 'string'}},\n", - " 'body_schema': None}" + " 'body_schema': None,\n", + " 'servers': ['/api/v3']}" ] }, - "execution_count": 28, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -273,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -295,24 +252,18 @@ " 'description': 'User Status',\n", " 'format': 'int32',\n", " 'example': 1}},\n", - " 'xml': {'name': 'user'}}}" + " 'xml': {'name': 'user'}},\n", + " 'servers': ['/api/v3']}" ] }, - "execution_count": 14, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "get_api_info_by_operation_id(json.loads(t), 'updateUser')" + "get_api_info_by_operation_id(parser.specification, 'updateUser')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 74e7e2f71..81f6284a2 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -13,6 +13,7 @@ from routes.workflow.load_openapi_spec import load_openapi_spec from prance import ResolvingParser +from prance import convert load_dotenv() @@ -24,15 +25,16 @@ # get path param, query param and json body schema for a given operation id -def get_api_info_by_operation_id( - data: Dict[str, Dict[str, dict]], target_operation_id: str -) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]]]]: +from typing import Dict, Union, Optional, List + +def get_api_info_by_operation_id(data: Dict[str, Dict[str, dict]], target_operation_id: str) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]], List[str]]]: api_info = { "endpoint": None, "method": None, "path_params": {}, "query_params": {}, "body_schema": None, + "servers": [] } for path, methods in data["paths"].items(): @@ -74,6 +76,11 @@ def get_api_info_by_operation_id( "application/json" ]["schema"] + # Extract server URLs + servers = data.get("servers", []) + server_urls = [server["url"] for server in servers] + api_info["servers"] = server_urls + return api_info @@ -112,4 +119,5 @@ def generate_openapi_payload( "path_params": path_params, "query_params": query_params, "body_schema": body_schema, + "servers": api_info["servers"] } diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index 47819574c..70f812241 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -96,12 +96,13 @@ def run_openapi_operations( api_payload["path"] = get_valid_url(api_payload, server_base_url) api_response = make_api_request( - request_type=api_payload["request_type"], - url=api_payload["path"], - body=api_payload["body"], - params=api_payload["path_params"], - query_params=api_payload["query_params"] + method=api_payload["method"], + endpoint=api_payload["endpoint"], + body_schema=api_payload["body_schema"], + path_params=api_payload["path_params"], + query_params=api_payload["query_params"], headers=headers, + servers=api_payload["servers"] ) record_info[operation_id] = json.loads(api_response.text) prev_api_response = api_response.text diff --git a/llm-server/utils/make_api_call.py b/llm-server/utils/make_api_call.py index 782dc0704..39e5eed57 100644 --- a/llm-server/utils/make_api_call.py +++ b/llm-server/utils/make_api_call.py @@ -25,13 +25,16 @@ def replace_url_placeholders(url: str, values_dict: Dict[str, Any]) -> str: def make_api_request( - endpoint: str, - method: str, - path_params: Dict[str, Any] = {}, - query_params: Dict[str, Any] = {}, - body_schema: Dict[str, Any] = {}, + method, + endpoint, + body_schema, + path_params, + query_params, + headers, + servers, ) -> Response: - url = replace_url_placeholders(url, params) + endpoint = replace_url_placeholders(endpoint, path_params) + url = servers[0].url +"/"+ endpoint # Create a session and configure it with headers session = requests.Session() @@ -42,14 +45,14 @@ def make_api_request( session.headers.update(headers) try: # Perform the HTTP request based on the request type - if request_type.upper() == "GET": - response = session.get(url, params=params) - elif request_type.upper() == "POST": - response = session.post(url, json=body, params=params) - elif request_type.upper() == "PUT": - response = session.put(url, json=body, params=params) - elif request_type.upper() == "DELETE": - response = session.delete(url, params=params) + if method == "GET": + response = session.get(url, params=query_params) + elif method == "POST": + response = session.post(url, json=body_schema, params=query_params) + elif method == "PUT": + response = session.put(url, json=body_schema, params=query_params) + elif method == "DELETE": + response = session.delete(url, params=query_params) else: raise ValueError("Invalid request type. Use GET, POST, PUT, or DELETE.") @@ -65,8 +68,9 @@ def make_api_request( extra={ "headers": headers, "url": url, - "params": params, - "request_type": request_type, + "params": path_params, + "query_params": query_params, + "method": method, }, ) raise (e) From f33082ac754145fbd32c680ad25bbbf8765da544 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Tue, 26 Sep 2023 23:37:22 +0300 Subject: [PATCH 04/14] prepending /app/shared to get absolute file url --- llm-server/routes/workflow/generate_openapi_payload.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 81f6284a2..2933f686c 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -95,8 +95,13 @@ def extract_json_payload(input_string: str) -> Optional[Any]: def generate_openapi_payload( swagger_text: str, text: str, _operation_id: str, prev_api_response: str ) -> Dict[str, Any]: + if isinstance(spec_source, str): + if spec_source.startswith(("http://", "https://")): + spec_source = "/app/shared"+spec_source + parser = ResolvingParser(spec_source) - (a, b, c) = parser.version_parsed # (3,0,2), we can then apply transformation on + (a,b,c) = parser.version_parsed # (3,0,2), we can then apply transformation on + print(a,b,c) # add transformation for swagger v2 api_info = get_api_info_by_operation_id(parser.specification, _operation_id) From f971f070cab9b39e4c07ac44ce46b5c1adedf843 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Thu, 28 Sep 2023 04:58:34 +0300 Subject: [PATCH 05/14] added some conditional logic to short circuit body generation for get calls, and prevent generation of query and route params if not defined in schema. This significantly reduces the number of operations and time required for them --- llm-server/app.py | 1 + .../workflow/generate_openapi_payload.py | 40 ++++++++++++------- .../routes/workflow/workflow_service.py | 14 +++---- llm-server/utils/make_api_call.py | 2 +- .../utils/vector_db/get_vector_store.py | 4 +- .../utils/vector_db/init_vector_store.py | 1 + 6 files changed, 38 insertions(+), 24 deletions(-) diff --git a/llm-server/app.py b/llm-server/app.py index 45fe6937a..1f81154c5 100644 --- a/llm-server/app.py +++ b/llm-server/app.py @@ -15,6 +15,7 @@ from routes.root_service import handle_request + ## TODO: Implement caching for the swagger file content (no need to load it everytime) @app.route("/handle", methods=["POST", "OPTIONS"]) def handle() -> Response: diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 2933f686c..8e76ac519 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -27,14 +27,17 @@ # get path param, query param and json body schema for a given operation id from typing import Dict, Union, Optional, List -def get_api_info_by_operation_id(data: Dict[str, Dict[str, dict]], target_operation_id: str) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]], List[str]]]: + +def get_api_info_by_operation_id( + data: Dict[str, Dict[str, dict]], target_operation_id: str +) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]], List[str]]]: api_info = { "endpoint": None, "method": None, "path_params": {}, "query_params": {}, "body_schema": None, - "servers": [] + "servers": [], } for path, methods in data["paths"].items(): @@ -97,26 +100,33 @@ def generate_openapi_payload( ) -> Dict[str, Any]: if isinstance(spec_source, str): if spec_source.startswith(("http://", "https://")): - spec_source = "/app/shared"+spec_source - + spec_source = "/app/shared" + spec_source + parser = ResolvingParser(spec_source) - (a,b,c) = parser.version_parsed # (3,0,2), we can then apply transformation on - print(a,b,c) + (a, b, c) = parser.version_parsed # (3,0,2), we can then apply transformation on + print(a, b, c) # add transformation for swagger v2 api_info = get_api_info_by_operation_id(parser.specification, _operation_id) - path_params = gen_params_from_schema( - api_info["path_params"], text, prev_api_response + path_params = ( + {} + if not api_info["path_params"] + else gen_params_from_schema(api_info["path_params"], text, prev_api_response) ) - query_params = gen_params_from_schema( - api_info["query_params"], text, prev_api_response + query_params = ( + {} + if not api_info["query_params"] + else gen_params_from_schema(api_info["query_params"], text, prev_api_response) ) - example = gen_ex_from_schema(api_info["body_schema"]) - body_schema = gen_body_from_schema( - api_info["body_schema"], text, prev_api_response, example - ) + if api_info["body_schema"]: + example = gen_ex_from_schema(api_info["body_schema"]) + body_schema = gen_body_from_schema( + api_info["body_schema"], text, prev_api_response, example + ) + else: + body_schema = {} return { "endpoint": api_info["endpoint"], @@ -124,5 +134,5 @@ def generate_openapi_payload( "path_params": path_params, "query_params": query_params, "body_schema": body_schema, - "servers": api_info["servers"] + "servers": api_info["servers"], } diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index 70f812241..f846cd657 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -23,15 +23,15 @@ def get_valid_url( api_payload: Dict[str, Union[str, None]], server_base_url: Optional[str] ) -> str: - if "path" in api_payload: - path = api_payload["path"] + if "endpoint" in api_payload: + endpoint = api_payload["endpoint"] # Check if path is a valid URL - if path and path.startswith(("http://", "https://")): - return path + if endpoint and endpoint.startswith(("http://", "https://")): + return endpoint elif server_base_url and server_base_url.startswith(("http://", "https://")): - # Append server_base_url to path - return f"{server_base_url}{path}" + # Append server_base_url to endpoint + return f"{server_base_url}{endpoint}" else: raise ValueError("Invalid server_base_url") else: @@ -102,7 +102,7 @@ def run_openapi_operations( path_params=api_payload["path_params"], query_params=api_payload["query_params"], headers=headers, - servers=api_payload["servers"] + servers=api_payload["servers"], ) record_info[operation_id] = json.loads(api_response.text) prev_api_response = api_response.text diff --git a/llm-server/utils/make_api_call.py b/llm-server/utils/make_api_call.py index 39e5eed57..5b620a140 100644 --- a/llm-server/utils/make_api_call.py +++ b/llm-server/utils/make_api_call.py @@ -34,7 +34,7 @@ def make_api_request( servers, ) -> Response: endpoint = replace_url_placeholders(endpoint, path_params) - url = servers[0].url +"/"+ endpoint + url = servers[0] + endpoint # Create a session and configure it with headers session = requests.Session() diff --git a/llm-server/utils/vector_db/get_vector_store.py b/llm-server/utils/vector_db/get_vector_store.py index 5c02cd899..c4c64e486 100644 --- a/llm-server/utils/vector_db/get_vector_store.py +++ b/llm-server/utils/vector_db/get_vector_store.py @@ -28,7 +28,9 @@ def get_vector_store(options: StoreOptions) -> VectorStore: ) elif store_type == StoreType.QDRANT.value: client = qdrant_client.QdrantClient( - url=os.environ["QDRANT_URL"], prefer_grpc=True + url=os.environ["QDRANT_URL"], + prefer_grpc=True, + api_key=os.getenv("QDRANT_API_KEY", ""), ) vector_store = Qdrant( diff --git a/llm-server/utils/vector_db/init_vector_store.py b/llm-server/utils/vector_db/init_vector_store.py index e8ea20c8f..e50e52165 100644 --- a/llm-server/utils/vector_db/init_vector_store.py +++ b/llm-server/utils/vector_db/init_vector_store.py @@ -56,6 +56,7 @@ def init_vector_store( embeddings, collection_name=options.namespace, url=os.environ["QDRANT_URL"], + api_key=os.getenv("QDRANT_API_KEY", ""), ) else: From ab8c4a9f79f686c92baf96be2063d6b5620a221b Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 01:20:43 +0300 Subject: [PATCH 06/14] removing some bugs --- llm-server/app.py | 5 +- llm-server/routes/_swagger/service.py | 4 +- llm-server/routes/root_service.py | 4 +- llm-server/routes/workflow/api_info.py | 19 ++++ .../workflow/generate_openapi_payload.py | 91 ++++++++----------- .../workflow/typings/run_workflow_input.py | 2 - .../routes/workflow/workflow_controller.py | 29 +++--- .../routes/workflow/workflow_schema.json | 22 ++++- .../routes/workflow/workflow_service.py | 15 +-- 9 files changed, 101 insertions(+), 90 deletions(-) create mode 100644 llm-server/routes/workflow/api_info.py diff --git a/llm-server/app.py b/llm-server/app.py index 1f81154c5..596366428 100644 --- a/llm-server/app.py +++ b/llm-server/app.py @@ -15,16 +15,15 @@ from routes.root_service import handle_request - ## TODO: Implement caching for the swagger file content (no need to load it everytime) @app.route("/handle", methods=["POST", "OPTIONS"]) def handle() -> Response: data = request.get_json() try: response = handle_request(data) - return jsonify(response), 200 + return jsonify(response) except Exception as e: - return jsonify({"response": str(e)}), 500 + return jsonify({"response": str(e)}) @app.errorhandler(500) diff --git a/llm-server/routes/_swagger/service.py b/llm-server/routes/_swagger/service.py index 1165818de..d720e4ee7 100644 --- a/llm-server/routes/_swagger/service.py +++ b/llm-server/routes/_swagger/service.py @@ -39,6 +39,6 @@ def add_swagger_file(request: Request, id: str) -> Dict[str, str]: # Insert into MongoDB file_content["bot_id"] = id - mongo.swagger_files.insert_one(file_content) + inserted_id = mongo.swagger_files.insert_one(file_content).inserted_id - return {"message": "File added successfully"} + return {"message": "File added successfully", id: str(inserted_id)} diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index 596b6592d..b96eb12d0 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -75,9 +75,7 @@ def handle_request(data: Dict[str, Any]) -> Any: try: if hasMultipleIntents(text): - return run_workflow( - WorkflowData(text, swagger_text, headers, server_base_url) - ) + return run_workflow(WorkflowData(text, headers, server_base_url)) except Exception as e: print(e) diff --git a/llm-server/routes/workflow/api_info.py b/llm-server/routes/workflow/api_info.py new file mode 100644 index 000000000..1e2e5d4d2 --- /dev/null +++ b/llm-server/routes/workflow/api_info.py @@ -0,0 +1,19 @@ +from typing import Dict, Union, Optional, List, Any + + +class ApiInfo: + def __init__( + self, + endpoint: Optional[str], + method: Optional[str], + path_params: Any, + query_params: Any, + body_schema: Any, + servers: List[str], + ) -> None: + self.endpoint = endpoint + self.method = method + self.path_params = path_params + self.query_params = query_params + self.body_schema = body_schema + self.servers = servers diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 8e76ac519..901f5b57b 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -1,19 +1,14 @@ import re import os import json -from langchain.tools.json.tool import JsonSpec from utils.get_llm import get_llm from dotenv import load_dotenv from .extractors.example_generator import gen_ex_from_schema from routes.workflow.extractors.extract_param import gen_params_from_schema from routes.workflow.extractors.extract_body import gen_body_from_schema -from custom_types.t_json import JsonData -from custom_types.swagger import ApiOperation -from typing import Dict, Any, Optional, Union, Tuple -from routes.workflow.load_openapi_spec import load_openapi_spec +from typing import Dict, Any, Optional from prance import ResolvingParser -from prance import convert load_dotenv() @@ -21,24 +16,23 @@ llm = get_llm() -from typing import Dict, Any, Optional, Union, List +from typing import Dict, Any, Optional, List # get path param, query param and json body schema for a given operation id -from typing import Dict, Union, Optional, List - - -def get_api_info_by_operation_id( - data: Dict[str, Dict[str, dict]], target_operation_id: str -) -> Dict[str, Union[str, dict, Optional[Dict[str, dict]], List[str]]]: - api_info = { - "endpoint": None, - "method": None, - "path_params": {}, - "query_params": {}, - "body_schema": None, - "servers": [], - } +from typing import Dict, Optional, List +from routes.workflow.api_info import ApiInfo + + +def get_api_info_by_operation_id(data: Any, target_operation_id: str) -> ApiInfo: + api_info = ApiInfo( + endpoint=None, + method=None, + path_params={}, + query_params={}, + body_schema=None, + servers=[], + ) for path, methods in data["paths"].items(): for method, details in methods.items(): @@ -47,8 +41,8 @@ def get_api_info_by_operation_id( and details["operationId"] == target_operation_id ): # Extract endpoint and method - api_info["endpoint"] = path - api_info["method"] = method.upper() + api_info.endpoint = path + api_info.method = method.upper() # Extract path parameters and their schemas path_params = {} @@ -57,7 +51,7 @@ def get_api_info_by_operation_id( param_name = parameter["name"] param_schema = parameter.get("schema", {}) path_params[param_name] = param_schema - api_info["path_params"] = path_params + api_info.path_params = path_params # Extract query parameters and their schemas query_params = {} @@ -66,7 +60,7 @@ def get_api_info_by_operation_id( param_name = parameter["name"] param_schema = parameter.get("schema", {}) query_params[param_name] = param_schema - api_info["query_params"] = query_params + api_info.query_params = query_params # Extract request body schema if "requestBody" in details: @@ -75,16 +69,16 @@ def get_api_info_by_operation_id( "content" in request_body and "application/json" in request_body["content"] ): - api_info["body_schema"] = request_body["content"][ + api_info.body_schema = request_body["content"][ "application/json" ]["schema"] # Extract server URLs servers = data.get("servers", []) server_urls = [server["url"] for server in servers] - api_info["servers"] = server_urls + api_info.servers = server_urls - return api_info + return api_info def extract_json_payload(input_string: str) -> Optional[Any]: @@ -97,42 +91,31 @@ def extract_json_payload(input_string: str) -> Optional[Any]: def generate_openapi_payload( swagger_text: str, text: str, _operation_id: str, prev_api_response: str -) -> Dict[str, Any]: - if isinstance(spec_source, str): - if spec_source.startswith(("http://", "https://")): - spec_source = "/app/shared" + spec_source - - parser = ResolvingParser(spec_source) +) -> ApiInfo: + parser = ResolvingParser(spec_string=swagger_text) (a, b, c) = parser.version_parsed # (3,0,2), we can then apply transformation on print(a, b, c) # add transformation for swagger v2 api_info = get_api_info_by_operation_id(parser.specification, _operation_id) - path_params = ( + api_info.path_params = ( {} - if not api_info["path_params"] - else gen_params_from_schema(api_info["path_params"], text, prev_api_response) + if not api_info.path_params + else gen_params_from_schema(api_info.path_params, text, prev_api_response) ) - query_params = ( + api_info.query_params = ( {} - if not api_info["query_params"] - else gen_params_from_schema(api_info["query_params"], text, prev_api_response) + if not api_info.query_params + else gen_params_from_schema(api_info.query_params, text, prev_api_response) ) - if api_info["body_schema"]: - example = gen_ex_from_schema(api_info["body_schema"]) - body_schema = gen_body_from_schema( - api_info["body_schema"], text, prev_api_response, example + if api_info.body_schema: + example = gen_ex_from_schema(api_info.body_schema) + api_info.body_schema = gen_body_from_schema( + api_info.body_schema, text, prev_api_response, example ) else: - body_schema = {} - - return { - "endpoint": api_info["endpoint"], - "method": api_info["method"], - "path_params": path_params, - "query_params": query_params, - "body_schema": body_schema, - "servers": api_info["servers"], - } + api_info.body_schema = {} + + return api_info diff --git a/llm-server/routes/workflow/typings/run_workflow_input.py b/llm-server/routes/workflow/typings/run_workflow_input.py index 71aa23596..f34417598 100644 --- a/llm-server/routes/workflow/typings/run_workflow_input.py +++ b/llm-server/routes/workflow/typings/run_workflow_input.py @@ -5,11 +5,9 @@ class WorkflowData: def __init__( self, text: str, - swagger_text: str, headers: Dict[str, str], server_base_url: str, ) -> None: self.text = text - self.swagger_text = swagger_text self.headers = headers self.server_base_url = server_base_url diff --git a/llm-server/routes/workflow/workflow_controller.py b/llm-server/routes/workflow/workflow_controller.py index 0563a3ec7..83cf2f18b 100644 --- a/llm-server/routes/workflow/workflow_controller.py +++ b/llm-server/routes/workflow/workflow_controller.py @@ -39,10 +39,10 @@ def get_workflow(workflow_id: str) -> Any: return jsonify({"message": "Workflow not found"}), 404 -@workflow.route("/", methods=["POST"]) +@workflow.route("/b/", methods=["POST"]) @validate_json(workflow_schema) @handle_exceptions_and_errors -def create_workflow() -> Any: +def create_workflow(bot_id: str) -> Any: workflow_data = cast(WorkflowDataType, request.json) workflows = mongo.workflows workflow_id = workflows.insert_one(workflow_data).inserted_id @@ -52,7 +52,7 @@ def create_workflow() -> Any: if namespace == "workflows": warning_message = "Warning: The 'namespace' variable is set to the generic value 'workflows'. You should replace it with a specific value for your org / user / account." warnings.warn(warning_message, UserWarning) - add_workflow_data_to_qdrant(namespace, workflow_id, workflow_data) + add_workflow_data_to_qdrant(namespace, workflow_id, workflow_data, bot_id) return ( jsonify({"message": "Workflow created", "workflow_id": str(workflow_id)}), @@ -60,8 +60,8 @@ def create_workflow() -> Any: ) -@workflow.route("/", methods=["GET"]) -def get_workflows() -> Any: +@workflow.route("/b/", methods=["GET"]) +def get_workflows(bot_id: str) -> Any: # Define default page and page_size values page = int(request.args.get("page", 1)) page_size = int(request.args.get("page_size", 10)) @@ -70,13 +70,15 @@ def get_workflows() -> Any: skip = (page - 1) * page_size # Query MongoDB to get a paginated list of workflows - workflows = list(mongo.workflows.find().skip(skip).limit(page_size)) + workflows = list( + mongo.workflows.find({"bot_id": bot_id}).skip(skip).limit(page_size) + ) for workflow in workflows: workflow["_id"] = str(workflow["_id"]) # Calculate the total number of workflows (for pagination metadata) - total_workflows = mongo.workflows.count_documents({}) + total_workflows = mongo.workflows.count_documents({"bot_id": bot_id}) # Prepare response data response_data = { @@ -94,7 +96,9 @@ def get_workflows() -> Any: @handle_exceptions_and_errors def update_workflow(workflow_id: str) -> Any: workflow_data = cast(WorkflowDataType, request.json) - mongo.workflows.update_one({"_id": ObjectId(workflow_id)}, {"$set": workflow_data}) + result = mongo.workflows.update_one( + {"_id": ObjectId(workflow_id)}, {"$set": workflow_data} + ) namespace = "workflows" vector_store = get_vector_store(StoreOptions(namespace)) vector_store.delete(ids=[workflow_id]) @@ -103,7 +107,9 @@ def update_workflow(workflow_id: str) -> Any: warning_message = "Warning: The 'namespace' variable is set to the generic value 'workflows'. You should replace it with a specific value for your org / user / account." warnings.warn(warning_message, UserWarning) - add_workflow_data_to_qdrant(namespace, workflow_id, workflow_data) + add_workflow_data_to_qdrant( + namespace, workflow_id, workflow_data, result.raw_result.get("bot_id") + ) return jsonify({"message": "Workflow updated"}), 200 @@ -121,7 +127,6 @@ def run_workflow_controller() -> Any: result = run_workflow( WorkflowData( text=data.get("text"), - swagger_url=data.get("swagger_url"), headers=data.get("headers", {}), server_base_url=data["server_base_url"], ) @@ -130,7 +135,7 @@ def run_workflow_controller() -> Any: def add_workflow_data_to_qdrant( - namespace: str, workflow_id: str, workflow_data: Any + namespace: str, workflow_id: str, workflow_data: Any, bot_id: str ) -> None: for flow in workflow_data["flows"]: docs = [ @@ -139,6 +144,8 @@ def add_workflow_data_to_qdrant( metadata={ "workflow_id": str(workflow_id), "workflow_name": workflow_data.get("name"), + "swagger_id": workflow_data.get("swagger_id"), + "bot_id": bot_id, }, ) ] diff --git a/llm-server/routes/workflow/workflow_schema.json b/llm-server/routes/workflow/workflow_schema.json index 63594887f..d4e38d2ed 100644 --- a/llm-server/routes/workflow/workflow_schema.json +++ b/llm-server/routes/workflow/workflow_schema.json @@ -6,6 +6,10 @@ "type": "string", "pattern": "^\\d+\\.\\d+$" }, + "swagger_id": { + "type": "string", + "pattern": "^[0-9a-fA-F]{24}$" + }, "info": { "type": "object", "properties": { @@ -16,7 +20,10 @@ "type": "string" } }, - "required": ["title", "version"] + "required": [ + "title", + "version" + ] }, "flows": { "type": "array", @@ -50,7 +57,10 @@ "type": "object" } }, - "required": ["operation", "open_api_operation_id"] + "required": [ + "operation", + "open_api_operation_id" + ] } }, "on_success": { @@ -87,5 +97,9 @@ } } }, - "required": ["opencopilot", "info", "flows"] -} + "required": [ + "opencopilot", + "info", + "flows" + ] +} \ No newline at end of file diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index f846cd657..fbd8bb47f 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -40,7 +40,6 @@ def get_valid_url( def run_workflow(data: WorkflowData) -> Any: text = data.text - swagger_text = data.swagger_text headers = data.headers or {} # This will come from the request payload later on when implementing multi-tenancy namespace = "workflows" @@ -63,6 +62,9 @@ def run_workflow(data: WorkflowData) -> Any: ObjectId(document.metadata["workflow_id"]) if document else None ) record = mongo.workflows.find_one({"_id": first_document_id}) + swagger_text = mongo.swagger_files.find_one( + {"_id": record.swagger_id}, {"_id": 0} + ) result = run_openapi_operations( record, swagger_text, text, headers, server_base_url @@ -94,16 +96,7 @@ def run_openapi_operations( swagger_text, text, operation_id, prev_api_response ) - api_payload["path"] = get_valid_url(api_payload, server_base_url) - api_response = make_api_request( - method=api_payload["method"], - endpoint=api_payload["endpoint"], - body_schema=api_payload["body_schema"], - path_params=api_payload["path_params"], - query_params=api_payload["query_params"], - headers=headers, - servers=api_payload["servers"], - ) + api_response = make_api_request(headers=headers, **api_payload.__dict__) record_info[operation_id] = json.loads(api_response.text) prev_api_response = api_response.text prev_api_response = "" From de50aa68b79d883d01328033ec0162b6e1dbca86 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 02:18:50 +0300 Subject: [PATCH 07/14] latest changes --- docker-compose.yml | 16 +++---- llm-server/routes/root_service.py | 2 +- .../workflow/generate_openapi_payload.py | 4 +- .../routes/workflow/workflow_service.py | 47 +++++++++---------- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index d8304ccc2..590198518 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -78,14 +78,14 @@ services: networks: - opencopilot_network - # adminer: - # image: adminer - # ports: - # - "8080:8080" - # depends_on: - # - mysql - # networks: - # - opencopilot_network + adminer: + image: adminer + ports: + - "8080:8080" + depends_on: + - mysql + networks: + - opencopilot_network qdrant: image: qdrant/qdrant diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index b96eb12d0..97fe18f49 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -41,7 +41,7 @@ def handle_request(data: Dict[str, Any]) -> Any: raise Exception("swagger_url is required") # Check if swagger file exists in MongoDB - swagger_doc = mongo.swagger_files.find_one({"_id": swagger_url}) + swagger_doc = mongo.swagger_files.find_one({"bot_id": swagger_url}) if swagger_doc: swagger_doc["_id"] = str(swagger_doc["_id"]) diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 901f5b57b..96b21f394 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -90,9 +90,9 @@ def extract_json_payload(input_string: str) -> Optional[Any]: def generate_openapi_payload( - swagger_text: str, text: str, _operation_id: str, prev_api_response: str + swagger_json: str, text: str, _operation_id: str, prev_api_response: str ) -> ApiInfo: - parser = ResolvingParser(spec_string=swagger_text) + parser = ResolvingParser(spec_string=swagger_json) (a, b, c) = parser.version_parsed # (3,0,2), we can then apply transformation on print(a, b, c) # add transformation for swagger v2 diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index fbd8bb47f..b5529296e 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -17,7 +17,7 @@ import os -VECTOR_DB_THRESHOLD = float(os.getenv("VECTOR_DB_THRESHOLD", 0.88)) +SCORE_THRESOLD = float(os.getenv("SCORE_THRESOLD", 0.88)) def get_valid_url( @@ -38,7 +38,7 @@ def get_valid_url( raise ValueError("Missing path parameter") -def run_workflow(data: WorkflowData) -> Any: +def run_workflow(data: WorkflowData, swagger_json: Any) -> Any: text = data.text headers = data.headers or {} # This will come from the request payload later on when implementing multi-tenancy @@ -49,40 +49,37 @@ def run_workflow(data: WorkflowData) -> Any: return json.dumps({"error": "text is required"}), 400 try: - vector_store = get_vector_store(StoreOptions(namespace)) - (document, score) = vector_store.similarity_search_with_relevance_scores(text)[ - 0 - ] - - if score > VECTOR_DB_THRESHOLD: - print( - f"Record '{document}' is highly similar with a similarity score of {score}" - ) - first_document_id = ( - ObjectId(document.metadata["workflow_id"]) if document else None - ) - record = mongo.workflows.find_one({"_id": first_document_id}) - swagger_text = mongo.swagger_files.find_one( - {"_id": record.swagger_id}, {"_id": 0} - ) + vector_store = get_vector_store(StoreOptions(namespace=)) + (document, score) = vector_store.similarity_search_with_relevance_scores( + text, score_threshold=SCORE_THRESOLD + )[0] - result = run_openapi_operations( - record, swagger_text, text, headers, server_base_url - ) - return result + print( + f"Record '{document}' is highly similar with a similarity score of {score}" + ) + first_document_id = ( + ObjectId(document.metadata["workflow_id"]) if document else None + ) + record = mongo.workflows.find_one({"_id": first_document_id}) + + + result = run_openapi_operations( + record, swagger_json, text, headers, server_base_url + ) + return result except Exception as e: # Log the error, but continue with the rest of the code print(f"Error fetching data from namespace '{namespace}': {str(e)}") # Call openapi spec even if an error occurred with Qdrant - result = create_and_run_openapi_agent(swagger_text, text, headers) + result = create_and_run_openapi_agent(swagger_json, text, headers) return {"response": result} def run_openapi_operations( record: Any, - swagger_text: str, + swagger_json: str, text: str, headers: Any, server_base_url: str, @@ -93,7 +90,7 @@ def run_openapi_operations( for step in flow.get("steps"): operation_id = step.get("open_api_operation_id") api_payload = generate_openapi_payload( - swagger_text, text, operation_id, prev_api_response + swagger_json, text, operation_id, prev_api_response ) api_response = make_api_request(headers=headers, **api_payload.__dict__) From 4393a9220597fbd1bf2fa237ab9ad723ec9b1fa1 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 02:32:53 +0300 Subject: [PATCH 08/14] removing bot id and using swagger url instead --- llm-server/routes/_swagger/controller.py | 10 ++++++---- llm-server/routes/root_service.py | 8 +++++--- .../routes/workflow/typings/run_workflow_input.py | 4 +++- llm-server/routes/workflow/workflow_service.py | 3 +-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/llm-server/routes/_swagger/controller.py b/llm-server/routes/_swagger/controller.py index 46a51319c..c92822017 100644 --- a/llm-server/routes/_swagger/controller.py +++ b/llm-server/routes/_swagger/controller.py @@ -11,8 +11,8 @@ _swagger = Blueprint("_swagger", __name__) -@_swagger.route("/b/", methods=["GET"]) -def get_swagger_files(id: str) -> Response: +@_swagger.route("/u/", methods=["GET"]) +def get_swagger_files(swagger_url: str) -> Response: # Get page and page_size query params page = int(request.args.get("page", 1)) page_size = int(request.args.get("page_size", 10)) @@ -24,11 +24,13 @@ def get_swagger_files(id: str) -> Response: # Query for paginated docs files = [ doc.update({"_id": str(doc["_id"])}) or doc - for doc in mongo.swagger_files.find({"bot_id": id}, {}).skip(skip).limit(limit) + for doc in mongo.swagger_files.find({"swagger_url": swagger_url}, {}) + .skip(skip) + .limit(limit) ] # Get total docs count - total = mongo.swagger_files.count_documents({}) + total = mongo.swagger_files.count_documents({"swagger_url": swagger_url}) # Prepare response data data = {"total": total, "page": page, "page_size": page_size, "files": files} diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index 97fe18f49..9a888b43d 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -41,7 +41,7 @@ def handle_request(data: Dict[str, Any]) -> Any: raise Exception("swagger_url is required") # Check if swagger file exists in MongoDB - swagger_doc = mongo.swagger_files.find_one({"bot_id": swagger_url}) + swagger_doc = mongo.swagger_files.find_one({"meta.swagger_url": swagger_url}) if swagger_doc: swagger_doc["_id"] = str(swagger_doc["_id"]) @@ -70,12 +70,14 @@ def handle_request(data: Dict[str, Any]) -> Any: swagger_json = json.loads(swagger_text) swagger_json["bot_id"] = swagger_url.replace(shared_folder, "") mongo.swagger_files.update_one( - {"bot_id": swagger_json["bot_id"]}, {"$set": swagger_json}, True + {"meta.swagger_url": swagger_url}, {"$set": swagger_json}, True ) try: if hasMultipleIntents(text): - return run_workflow(WorkflowData(text, headers, server_base_url)) + return run_workflow( + WorkflowData(text, headers, server_base_url, swagger_url), swagger_json + ) except Exception as e: print(e) diff --git a/llm-server/routes/workflow/typings/run_workflow_input.py b/llm-server/routes/workflow/typings/run_workflow_input.py index f34417598..1cc0d0e1c 100644 --- a/llm-server/routes/workflow/typings/run_workflow_input.py +++ b/llm-server/routes/workflow/typings/run_workflow_input.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Optional class WorkflowData: @@ -7,7 +7,9 @@ def __init__( text: str, headers: Dict[str, str], server_base_url: str, + swagger_url: str, ) -> None: self.text = text self.headers = headers self.server_base_url = server_base_url + self.swagger_url = swagger_url diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index b5529296e..1588dbdbe 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -49,7 +49,7 @@ def run_workflow(data: WorkflowData, swagger_json: Any) -> Any: return json.dumps({"error": "text is required"}), 400 try: - vector_store = get_vector_store(StoreOptions(namespace=)) + vector_store = get_vector_store(StoreOptions(namespace=data.swagger_url)) (document, score) = vector_store.similarity_search_with_relevance_scores( text, score_threshold=SCORE_THRESOLD )[0] @@ -62,7 +62,6 @@ def run_workflow(data: WorkflowData, swagger_json: Any) -> Any: ) record = mongo.workflows.find_one({"_id": first_document_id}) - result = run_openapi_operations( record, swagger_json, text, headers, server_base_url ) From 0211ee2813924867de6b4faf93b5ed1bcb90c7c9 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 03:09:01 +0300 Subject: [PATCH 09/14] Give this code a refactor --- llm-server/routes/_swagger/controller.py | 4 +- llm-server/routes/root_service.py | 63 ++++++++++++------- .../routes/workflow/workflow_controller.py | 30 ++++----- 3 files changed, 54 insertions(+), 43 deletions(-) diff --git a/llm-server/routes/_swagger/controller.py b/llm-server/routes/_swagger/controller.py index c92822017..30f8f0759 100644 --- a/llm-server/routes/_swagger/controller.py +++ b/llm-server/routes/_swagger/controller.py @@ -24,13 +24,13 @@ def get_swagger_files(swagger_url: str) -> Response: # Query for paginated docs files = [ doc.update({"_id": str(doc["_id"])}) or doc - for doc in mongo.swagger_files.find({"swagger_url": swagger_url}, {}) + for doc in mongo.swagger_files.find({"meta.swagger_url": swagger_url}, {}) .skip(skip) .limit(limit) ] # Get total docs count - total = mongo.swagger_files.count_documents({"swagger_url": swagger_url}) + total = mongo.swagger_files.count_documents({"meta.swagger_url": swagger_url}) # Prepare response data data = {"total": total, "page": page, "page_size": page_size, "files": files} diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index 9a888b43d..f6b49f355 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -24,48 +24,61 @@ shared_folder = os.getenv("SHARED_FOLDER", "/app/shared_data/") +# Define constants for error messages +BASE_PROMPT_REQUIRED = "base_prompt is required" +TEXT_REQUIRED = "text is required" +SWAGGER_URL_REQUIRED = "swagger_url is required" +FAILED_TO_FETCH_SWAGGER_CONTENT = "Failed to fetch Swagger content" +FILE_NOT_FOUND = "File not found" +FAILED_TO_CALL_API_ENDPOINT = "Failed to call or map API endpoint" + + +def fetch_swagger_text(swagger_url: str) -> str: + if swagger_url.startswith("https://"): + response = requests.get(swagger_url) + if response.status_code == 200: + return response.text + else: + raise Exception(FAILED_TO_FETCH_SWAGGER_CONTENT) + else: + try: + with open(swagger_url, "r") as file: + return file.read() + except FileNotFoundError: + raise Exception(FILE_NOT_FOUND) + + def handle_request(data: Dict[str, Any]) -> Any: text = data.get("text") - swagger_url = cast(str, data.get("swagger_url")) - base_prompt = data.get("base_prompt") + swagger_url = cast(str, data.get("swagger_url", "")) + base_prompt = data.get("base_prompt", "") headers = data.get("headers", {}) - server_base_url = cast(str, data.get("server_base_url")) + server_base_url = cast(str, data.get("server_base_url", "")) + # Check if required fields are present if not base_prompt: - raise Exception("base_prompt is required") + raise Exception(BASE_PROMPT_REQUIRED) if not text: - raise Exception("text is required") + raise Exception(TEXT_REQUIRED) if not swagger_url: - raise Exception("swagger_url is required") + raise Exception(SWAGGER_URL_REQUIRED) # Check if swagger file exists in MongoDB - swagger_doc = mongo.swagger_files.find_one({"meta.swagger_url": swagger_url}) + swagger_doc = mongo.swagger_files.find_one( + {"meta.swagger_url": swagger_url}, {"meta": 0, "_id": 0} + ) if swagger_doc: - swagger_doc["_id"] = str(swagger_doc["_id"]) swagger_text = swagger_doc else: - if swagger_url.startswith("https://"): - pass - else: + if not swagger_url.startswith("https://"): swagger_url = shared_folder + swagger_url print(f"swagger_url::{swagger_url}") - if swagger_url.startswith("https://"): - response = requests.get(swagger_url) - if response.status_code == 200: - swagger_text = response.text - else: - raise Exception("Failed to fetch Swagger content") - else: - try: - with open(swagger_url, "r") as file: - swagger_text = file.read() - except FileNotFoundError: - raise Exception("File not found") + swagger_text = fetch_swagger_text(swagger_url) swagger_json = json.loads(swagger_text) swagger_json["bot_id"] = swagger_url.replace(shared_folder, "") @@ -73,6 +86,8 @@ def handle_request(data: Dict[str, Any]) -> Any: {"meta.swagger_url": swagger_url}, {"$set": swagger_json}, True ) + swagger_json = swagger_doc or swagger_json + try: if hasMultipleIntents(text): return run_workflow( @@ -86,7 +101,7 @@ def handle_request(data: Dict[str, Any]) -> Any: try: json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers) except Exception as e: - logging.error(f"Failed to call or map API endpoint: {str(e)}") + logging.error(f"{FAILED_TO_CALL_API_ENDPOINT}: {str(e)}") logging.error("Exception traceback:\n" + traceback.format_exc()) json_output = None diff --git a/llm-server/routes/workflow/workflow_controller.py b/llm-server/routes/workflow/workflow_controller.py index 83cf2f18b..f386e789a 100644 --- a/llm-server/routes/workflow/workflow_controller.py +++ b/llm-server/routes/workflow/workflow_controller.py @@ -39,20 +39,15 @@ def get_workflow(workflow_id: str) -> Any: return jsonify({"message": "Workflow not found"}), 404 -@workflow.route("/b/", methods=["POST"]) +@workflow.route("/u/", methods=["POST"]) @validate_json(workflow_schema) @handle_exceptions_and_errors -def create_workflow(bot_id: str) -> Any: +def create_workflow(swagger_url: str) -> Any: workflow_data = cast(WorkflowDataType, request.json) workflows = mongo.workflows workflow_id = workflows.insert_one(workflow_data).inserted_id - namespace = "workflows" - # Check if the namespace is generic - if namespace == "workflows": - warning_message = "Warning: The 'namespace' variable is set to the generic value 'workflows'. You should replace it with a specific value for your org / user / account." - warnings.warn(warning_message, UserWarning) - add_workflow_data_to_qdrant(namespace, workflow_id, workflow_data, bot_id) + add_workflow_data_to_qdrant(workflow_id, workflow_data, swagger_url) return ( jsonify({"message": "Workflow created", "workflow_id": str(workflow_id)}), @@ -102,13 +97,9 @@ def update_workflow(workflow_id: str) -> Any: namespace = "workflows" vector_store = get_vector_store(StoreOptions(namespace)) vector_store.delete(ids=[workflow_id]) - # Check if the namespace is generic - if namespace == "workflows": - warning_message = "Warning: The 'namespace' variable is set to the generic value 'workflows'. You should replace it with a specific value for your org / user / account." - warnings.warn(warning_message, UserWarning) add_workflow_data_to_qdrant( - namespace, workflow_id, workflow_data, result.raw_result.get("bot_id") + workflow_id, workflow_data, result.raw_result.get("bot_id") ) return jsonify({"message": "Workflow updated"}), 200 @@ -124,18 +115,23 @@ def delete_workflow(workflow_id: str) -> Any: @handle_exceptions_and_errors def run_workflow_controller() -> Any: data = request.get_json() + + swagger_url = data.get("swagger_url") + swagger_json = mongo.swagger_files.find_one({"meta.swagger_url": swagger_url}) result = run_workflow( WorkflowData( text=data.get("text"), headers=data.get("headers", {}), server_base_url=data["server_base_url"], - ) + swagger_url=data.get("swagger_url"), + ), + swagger_json, ) return result def add_workflow_data_to_qdrant( - namespace: str, workflow_id: str, workflow_data: Any, bot_id: str + workflow_id: str, workflow_data: Any, swagger_url: str ) -> None: for flow in workflow_data["flows"]: docs = [ @@ -145,9 +141,9 @@ def add_workflow_data_to_qdrant( "workflow_id": str(workflow_id), "workflow_name": workflow_data.get("name"), "swagger_id": workflow_data.get("swagger_id"), - "bot_id": bot_id, + "swagger_url": swagger_url, }, ) ] embeddings = get_embeddings() - init_vector_store(docs, embeddings, StoreOptions(namespace)) + init_vector_store(docs, embeddings, StoreOptions(swagger_url)) From 4d12b06c8225eb30955c32b2b698f877c85c89e7 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 04:26:37 +0300 Subject: [PATCH 10/14] minor update -> much better progress --- llm-server/routes/root_service.py | 67 ++++++------------- .../workflow/extractors/extract_body.py | 4 +- .../workflow/extractors/extract_param.py | 2 +- .../workflow/generate_openapi_payload.py | 10 ++- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index f6b49f355..d496b0096 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -38,65 +38,42 @@ def fetch_swagger_text(swagger_url: str) -> str: response = requests.get(swagger_url) if response.status_code == 200: return response.text - else: - raise Exception(FAILED_TO_FETCH_SWAGGER_CONTENT) - else: - try: - with open(swagger_url, "r") as file: - return file.read() - except FileNotFoundError: - raise Exception(FILE_NOT_FOUND) + raise Exception(FAILED_TO_FETCH_SWAGGER_CONTENT) + try: + with open(shared_folder + swagger_url, "r") as file: + return file.read() + except FileNotFoundError: + raise Exception(FILE_NOT_FOUND) def handle_request(data: Dict[str, Any]) -> Any: - text = data.get("text") + text: str = cast(str, data.get("text")) swagger_url = cast(str, data.get("swagger_url", "")) base_prompt = data.get("base_prompt", "") headers = data.get("headers", {}) server_base_url = cast(str, data.get("server_base_url", "")) - # Check if required fields are present - if not base_prompt: - raise Exception(BASE_PROMPT_REQUIRED) - - if not text: - raise Exception(TEXT_REQUIRED) - - if not swagger_url: - raise Exception(SWAGGER_URL_REQUIRED) + for required_field, error_msg in [ + ("base_prompt", BASE_PROMPT_REQUIRED), + ("text", TEXT_REQUIRED), + ("swagger_url", SWAGGER_URL_REQUIRED), + ]: + if not locals()[required_field]: + raise Exception(error_msg) - # Check if swagger file exists in MongoDB swagger_doc = mongo.swagger_files.find_one( {"meta.swagger_url": swagger_url}, {"meta": 0, "_id": 0} - ) - - if swagger_doc: - swagger_text = swagger_doc - else: - if not swagger_url.startswith("https://"): - swagger_url = shared_folder + swagger_url - - print(f"swagger_url::{swagger_url}") - - swagger_text = fetch_swagger_text(swagger_url) - - swagger_json = json.loads(swagger_text) - swagger_json["bot_id"] = swagger_url.replace(shared_folder, "") - mongo.swagger_files.update_one( - {"meta.swagger_url": swagger_url}, {"$set": swagger_json}, True - ) - - swagger_json = swagger_doc or swagger_json + ) or json.loads(fetch_swagger_text(swagger_url)) try: if hasMultipleIntents(text): return run_workflow( - WorkflowData(text, headers, server_base_url, swagger_url), swagger_json + WorkflowData(text, headers, server_base_url, swagger_url), swagger_doc ) except Exception as e: print(e) - swagger_spec = OpenAPISpec.from_text(swagger_text) + swagger_spec = OpenAPISpec.from_text(fetch_swagger_text(swagger_url)) try: json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers) @@ -107,11 +84,11 @@ def handle_request(data: Dict[str, Any]) -> Any: llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0) - if json_output is None: - prompt_msgs = non_api_base_prompt(base_prompt, text) - else: - prompt_msgs = api_base_prompt(base_prompt, text, json_output) - + prompt_msgs = ( + non_api_base_prompt(base_prompt, text) + if json_output is None + else api_base_prompt(base_prompt, text, json_output) + ) prompt = ChatPromptTemplate(messages=prompt_msgs) chain = create_structured_output_chain(AiResponseFormat, llm, prompt, verbose=False) return chain.run(question=text).dict() diff --git a/llm-server/routes/workflow/extractors/extract_body.py b/llm-server/routes/workflow/extractors/extract_body.py index 1573b4608..7e2c2efe5 100644 --- a/llm-server/routes/workflow/extractors/extract_body.py +++ b/llm-server/routes/workflow/extractors/extract_body.py @@ -12,14 +12,14 @@ def gen_body_from_schema( - body_schema: JsonData, text: str, prev_api_response: str, example: str + body_schema: str, text: str, prev_api_response: str, example: str ) -> Any: _DEFAULT_TEMPLATE = """To enable a substantially intelligent language model to execute a series of APIs sequentially, the following essential details are necessary to gather information needed for the next API call: 1. Initial input when starting the flow: `{text}` 2. Previous API responses: `{prev_api_response}` 3. A JSON response schema that defines the expected format: `{body_schema}` - Here is a dummy example for expected output: ```{example}``` + Try to adhere to this sample api payload as much as possible: ```{example}``` The JSON payload, enclosed within triple backticks on both sides, strictly conforming to the specified "type/format" as outlined in the schema is as follows: """ diff --git a/llm-server/routes/workflow/extractors/extract_param.py b/llm-server/routes/workflow/extractors/extract_param.py index 637894829..985df9c7b 100644 --- a/llm-server/routes/workflow/extractors/extract_param.py +++ b/llm-server/routes/workflow/extractors/extract_param.py @@ -11,7 +11,7 @@ def gen_params_from_schema( - param_schema: JsonData, text: str, prev_resp: str + param_schema: str, text: str, prev_resp: str ) -> Optional[JsonData]: """Extracts API parameters from a schema based on user text and previous response. diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index 96b21f394..5d3e21eb5 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -102,18 +102,22 @@ def generate_openapi_payload( api_info.path_params = ( {} if not api_info.path_params - else gen_params_from_schema(api_info.path_params, text, prev_api_response) + else gen_params_from_schema( + json.dumps(api_info.path_params), text, prev_api_response + ) ) api_info.query_params = ( {} if not api_info.query_params - else gen_params_from_schema(api_info.query_params, text, prev_api_response) + else gen_params_from_schema( + json.dumps(api_info.query_params), text, prev_api_response + ) ) if api_info.body_schema: example = gen_ex_from_schema(api_info.body_schema) api_info.body_schema = gen_body_from_schema( - api_info.body_schema, text, prev_api_response, example + json.dumps(api_info.body_schema), text, prev_api_response, example ) else: api_info.body_schema = {} From af57947daa995673e81116f7cda4ef4d4042a27c Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 05:44:50 +0300 Subject: [PATCH 11/14] Adding a limit on planner execution --- llm-server/api_caller/planner.py | 3 +++ llm-server/routes/root_service.py | 2 +- .../routes/workflow/hierarchical_planner.py | 8 ++++---- .../routes/workflow/workflow_service.py | 2 +- llm-server/utils/detect_multiple_intents.py | 19 +++++++------------ 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/llm-server/api_caller/planner.py b/llm-server/api_caller/planner.py index d218265b2..8d30bb808 100644 --- a/llm-server/api_caller/planner.py +++ b/llm-server/api_caller/planner.py @@ -359,5 +359,8 @@ def create_openapi_agent( callback_manager=callback_manager, verbose=verbose, maxIterations=2, + early_stopping_method="generate", # allow additional pass + max_execution_time=10, # kill after 40 seconds + handle_parsing_errors=True, **(agent_executor_kwargs or {}), ) diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index d496b0096..70a2c8847 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -66,7 +66,7 @@ def handle_request(data: Dict[str, Any]) -> Any: ) or json.loads(fetch_swagger_text(swagger_url)) try: - if hasMultipleIntents(text): + if not hasSingleIntent(swagger_doc, text): return run_workflow( WorkflowData(text, headers, server_base_url, swagger_url), swagger_doc ) diff --git a/llm-server/routes/workflow/hierarchical_planner.py b/llm-server/routes/workflow/hierarchical_planner.py index 8e44da762..17bea9ce8 100644 --- a/llm-server/routes/workflow/hierarchical_planner.py +++ b/llm-server/routes/workflow/hierarchical_planner.py @@ -15,11 +15,11 @@ def create_and_run_openapi_agent( - swagger_text: str, user_query: str, headers: Dict[str, str] = {} + swagger_json: Any, user_query: str, headers: Dict[str, str] = {} ) -> Any: # Load OpenAPI spec - raw_spec = json.loads(swagger_text) - spec = reduce_openapi_spec(raw_spec) + # raw_spec = json.loads(swagger_json) + spec = reduce_openapi_spec(swagger_json) # Create RequestsWrapper with auth requests_wrapper: RequestsWrapper = RequestsWrapper(headers=headers) @@ -28,7 +28,7 @@ def create_and_run_openapi_agent( f"Using {PLAN_AND_EXECUTE_MODEL} for plan and execute agent, you can change it by setting PLAN_AND_EXECUTE_MODEL variable" ) # Create OpenAPI agent - llm: OpenAI = OpenAI(model_name=PLAN_AND_EXECUTE_MODEL, temperature=0.0) + llm: OpenAI = OpenAI(temperature=0.0) agent = planner.create_openapi_agent(spec, requests_wrapper, llm) # Run agent on user query diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index 1588dbdbe..99b74575d 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -65,7 +65,7 @@ def run_workflow(data: WorkflowData, swagger_json: Any) -> Any: result = run_openapi_operations( record, swagger_json, text, headers, server_base_url ) - return result + return {"response": result} except Exception as e: # Log the error, but continue with the rest of the code diff --git a/llm-server/utils/detect_multiple_intents.py b/llm-server/utils/detect_multiple_intents.py index df8290912..c3e3da6f9 100644 --- a/llm-server/utils/detect_multiple_intents.py +++ b/llm-server/utils/detect_multiple_intents.py @@ -48,20 +48,13 @@ def hasMultipleIntents(user_input: str) -> bool: # print(json.dumps(result, indent=2)) -def getSummaries(swagger_text: str): +def getSummaries(swagger_doc: Any): """Get API endpoint summaries from an OpenAPI spec.""" summaries: List[str] = [] - # Load the OpenAPI spec - spec_dict: Optional[Dict[str, Any]] = json.loads(swagger_text) - if not spec_dict: - raise ValueError("Unable to load OpenAPI spec") - - json_spec: JsonSpec = JsonSpec(dict_=spec_dict, max_value_length=4000) - # Get the paths and iterate over them - paths: Optional[Dict[str, Any]] = json_spec.dict_.get("paths") + paths: Optional[Dict[str, Any]] = swagger_doc.get("paths") if not paths: raise ValueError("OpenAPI spec missing 'paths'") @@ -69,13 +62,15 @@ def getSummaries(swagger_text: str): operation = paths[path] for field in operation: if "summary" in operation[field]: - summaries.append(operation[field]["operationId"]) + summaries.append( + f"""{operation[field]["operationId"]} - {operation[field]["description"]}""" + ) return summaries -def hasSingleIntent(swagger_text: str, user_requirement: str) -> bool: - summaries = getSummaries(swagger_text) +def hasSingleIntent(swagger_doc: Any, user_requirement: str) -> bool: + summaries = getSummaries(swagger_doc) _DEFAULT_TEMPLATE = """ User: Here is a list of API summaries: {summaries} From 847a87a02d9d4f0dd6b11bd3f971974a58600c26 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 05:55:08 +0300 Subject: [PATCH 12/14] Adding the put tools to list of tools that the agent should be allowed to use --- llm-server/api_caller/planner.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llm-server/api_caller/planner.py b/llm-server/api_caller/planner.py index 8d30bb808..a31e37f8c 100644 --- a/llm-server/api_caller/planner.py +++ b/llm-server/api_caller/planner.py @@ -245,6 +245,7 @@ def _create_api_controller_agent( ) -> AgentExecutor: get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT) post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT) + put_llm_chain = LLMChain(llm=llm, prompt=PARSING_PUT_PROMPT) tools: List[BaseTool] = [ RequestsGetToolWithParsing( requests_wrapper=requests_wrapper, llm_chain=get_llm_chain @@ -252,6 +253,9 @@ def _create_api_controller_agent( RequestsPostToolWithParsing( requests_wrapper=requests_wrapper, llm_chain=post_llm_chain ), + RequestsPutToolWithParsing( + requests_wrapper=requests_wrapper, llm_chain=put_llm_chain + ), ] prompt = PromptTemplate( template=API_CONTROLLER_PROMPT, @@ -359,8 +363,8 @@ def create_openapi_agent( callback_manager=callback_manager, verbose=verbose, maxIterations=2, - early_stopping_method="generate", # allow additional pass - max_execution_time=10, # kill after 40 seconds + early_stopping_method="generate", # allow one last pass to generate correct response + max_execution_time=20, # kill after 20 seconds handle_parsing_errors=True, **(agent_executor_kwargs or {}), ) From 37e14d6ba17b75702875217473116e13be4aee92 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 05:59:50 +0300 Subject: [PATCH 13/14] Adding the put tool --- llm-server/api_caller/planner.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llm-server/api_caller/planner.py b/llm-server/api_caller/planner.py index a31e37f8c..d0e69d64b 100644 --- a/llm-server/api_caller/planner.py +++ b/llm-server/api_caller/planner.py @@ -246,6 +246,8 @@ def _create_api_controller_agent( get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT) post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT) put_llm_chain = LLMChain(llm=llm, prompt=PARSING_PUT_PROMPT) + delete_llm_chain = LLMChain(llm=llm, prompt=PARSING_DELETE_PROMPT) + path_llm_chain = LLMChain(llm=llm, prompt=PARSING_PATCH_PROMPT) tools: List[BaseTool] = [ RequestsGetToolWithParsing( requests_wrapper=requests_wrapper, llm_chain=get_llm_chain @@ -256,6 +258,12 @@ def _create_api_controller_agent( RequestsPutToolWithParsing( requests_wrapper=requests_wrapper, llm_chain=put_llm_chain ), + # RequestsDeleteToolWithParsing( + # requests_wrapper=requests_wrapper, llm_chain=delete_llm_chain + # ), + # RequestsPatchToolWithParsing( + # requests_wrapper=requests_wrapper, llm_chain=path_llm_chain + # ), ] prompt = PromptTemplate( template=API_CONTROLLER_PROMPT, @@ -297,7 +305,7 @@ def _create_and_run_api_controller_agent(plan_str: str) -> str: "{method} {route}".format(method=method, route=route.split("?")[0]) for method, route in matches ] - endpoint_docs_by_name = {name: docs for name, _, docs in api_spec.endpoints} + # endpoint_docs_by_name = {name: docs for name, _, docs in api_spec.endpoints} docs_str = "" for endpoint_name in endpoint_names: found_match = False From e29cdf68ad658010521b9b9a1b94065116beeb82 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Sat, 30 Sep 2023 08:18:33 +0300 Subject: [PATCH 14/14] fixing some prompt logic --- llm-server/requirements.txt | 3 +++ llm-server/routes/root_service.py | 7 ++++++- llm-server/utils/detect_multiple_intents.py | 20 +++++++++++++------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/llm-server/requirements.txt b/llm-server/requirements.txt index 12b1d7bf6..81b7a9a8a 100644 --- a/llm-server/requirements.txt +++ b/llm-server/requirements.txt @@ -78,6 +78,7 @@ pathspec==0.11.2 pathy==0.10.2 pinecone-client==2.2.2 portalocker==2.7.0 +prance==23.6.21.0 preshed==3.0.8 protobuf==4.24.2 pydantic==1.10.11 @@ -92,6 +93,8 @@ regex==2023.6.3 requests==2.31.0 rfc3339-validator==0.1.4 rpds-py==0.10.2 +ruamel.yaml==0.17.32 +ruamel.yaml.clib==0.2.7 safetensors==0.3.1 six==1.16.0 smart-open==6.3.0 diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index 70a2c8847..2d06f7b31 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -66,10 +66,15 @@ def handle_request(data: Dict[str, Any]) -> Any: ) or json.loads(fetch_swagger_text(swagger_url)) try: - if not hasSingleIntent(swagger_doc, text): + k = hasSingleIntent(swagger_doc, text) + if k == False: return run_workflow( WorkflowData(text, headers, server_base_url, swagger_url), swagger_doc ) + elif(k == True): + raise "Try match and call" + else: + return {"response": k} except Exception as e: print(e) diff --git a/llm-server/utils/detect_multiple_intents.py b/llm-server/utils/detect_multiple_intents.py index c3e3da6f9..401ff8662 100644 --- a/llm-server/utils/detect_multiple_intents.py +++ b/llm-server/utils/detect_multiple_intents.py @@ -72,13 +72,17 @@ def getSummaries(swagger_doc: Any): def hasSingleIntent(swagger_doc: Any, user_requirement: str) -> bool: summaries = getSummaries(swagger_doc) _DEFAULT_TEMPLATE = """ - User: Here is a list of API summaries: - {summaries} + You are an AI chatbot equipped with the capability to interact with APIs on behalf of users. However, users may also ask you general questions that do not necessitate API calls. - Can one of these api's suffice the users request? Please reply with either "YES" or "NO" with explanation + **User Input:** + ``` + User: Here is a list of API summaries: + {summaries} - User requirement: - {user_requirement} + If the request can be completed with a single API call, please reply with "__ONE__". If it requires multiple API calls, respond with "__MULTIPLE__". If the query is a general question and does not require an API call, provide the answer to the question. + + User Requirement: + {user_requirement} """ llm = get_llm() PROMPT = PromptTemplate( @@ -100,7 +104,9 @@ def hasSingleIntent(swagger_doc: Any, user_requirement: str) -> bool: print(f"Summary call response: {response}") - if "yes" in response.lower(): + if "__ONE__" in response.upper(): return True - else: + elif "__MULTIPLE__" in response.upper(): return False + else: + return response