Merge pull request #124 from openchatai/custom_planner_3

Custom API Planner - Enhancements and Adjustments
openchatai · Oct 6, 2023 · 782ea77 · 782ea77
2 parents 774a8f2 + caa37a8
commit 782ea77
Show file tree

Hide file tree

Showing 20 changed files with 426 additions and 286 deletions.
diff --git a/llm-server/api_caller/base.py b/llm-server/api_caller/base.py
@@ -5,7 +5,7 @@
 
 
 def try_to_match_and_call_api_endpoint(
-        swagger_spec: OpenAPISpec, text: str, headers: Dict[str, str]
+    swagger_spec: OpenAPISpec, text: str, headers: Dict[str, str]
 ) -> str:
     openapi_call_chain = get_openapi_chain(swagger_spec, verbose=True, headers=headers)
 

diff --git a/llm-server/readme.md b/llm-server/readme.md
@@ -63,6 +63,8 @@ To install Mypy, which is a static type checker for Python, follow these steps:
    MONGODB_URL=mongodb://localhost:27017/opencopilot
    QDRANT_URL=http://localhost:6333  
    STORE=QDRANT
+   QDRANT_API_KEY= # When using cloud hosted version
+   SCORE_THRESHOLD=0.95 # When using pre defined workflows, the confidence score at which the opencopilot should select your workflow. If the score falls below this, the planner will design it's own workflow
    ```
 
    Ensure you replace the placeholders with your actual API keys and configuration settings.

diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py
@@ -3,7 +3,6 @@
 from typing import Dict, Any, cast
 
 import logging
-import requests
 import traceback
 from dotenv import load_dotenv
 from langchain.chains.openai_functions import create_structured_output_chain
@@ -13,16 +12,19 @@
 from models.models import AiResponseFormat
 from prompts.base import api_base_prompt, non_api_base_prompt
 from routes.workflow.typings.run_workflow_input import WorkflowData
-from routes.workflow.workflow_service import run_workflow
-from utils.detect_multiple_intents import hasSingleIntent
+from routes.workflow.utils import (
+    run_workflow,
+    check_workflow_in_store,
+    fetch_swagger_text,
+    hasSingleIntent,
+    create_workflow_from_operation_ids,
+)
+from bson import ObjectId
 import os
 from dotenv import load_dotenv
 from typing import Dict, Any, cast
 from utils.db import Database
-from utils.detect_multiple_intents import hasSingleIntent
 import json
-import yaml
-from yaml.parser import ParserError
 from api_caller.base import try_to_match_and_call_api_endpoint
 
 db_instance = Database()
@@ -40,48 +42,6 @@
 FAILED_TO_CALL_API_ENDPOINT = "Failed to call or map API endpoint"
 
 
-def fetch_swagger_text(swagger_url: str) -> str:
-    if swagger_url.startswith("https://"):
-        response = requests.get(swagger_url)
-        if response.status_code == 200:
-            try:
-                # Try parsing the content as JSON
-                json_content = json.loads(response.text)
-                return json.dumps(json_content, indent=2)
-            except json.JSONDecodeError:
-                try:
-                    # Try parsing the content as YAML
-                    yaml_content = yaml.safe_load(response.text)
-                    if isinstance(yaml_content, dict):
-                        return json.dumps(yaml_content, indent=2)
-                    else:
-                        raise Exception("Invalid YAML content")
-                except ParserError:
-                    raise Exception("Failed to parse content as JSON or YAML")
-
-        raise Exception("Failed to fetch Swagger content")
-
-    try:
-        with open(shared_folder + swagger_url, "r") as file:
-            content = file.read()
-            try:
-                # Try parsing the content as JSON
-                json_content = json.loads(content)
-                return json.dumps(json_content, indent=2)
-            except json.JSONDecodeError:
-                try:
-                    # Try parsing the content as YAML
-                    yaml_content = yaml.safe_load(content)
-                    if isinstance(yaml_content, dict):
-                        return json.dumps(yaml_content, indent=2)
-                    else:
-                        raise Exception("Invalid YAML content")
-                except ParserError:
-                    raise Exception("Failed to parse content as JSON or YAML")
-    except FileNotFoundError:
-        raise Exception("File not found")
-
-
 def handle_request(data: Dict[str, Any]) -> Any:
     text: str = cast(str, data.get("text"))
     swagger_url = cast(str, data.get("swagger_url", ""))
@@ -99,7 +59,7 @@ def handle_request(data: Dict[str, Any]) -> Any:
         if not locals()[required_field]:
             raise Exception(error_msg)
 
-    swagger_doc = mongo.swagger_files.find_one(
+    swagger_doc: Dict[str, Any] = mongo.swagger_files.find_one(
         {"meta.swagger_url": swagger_url}, {"meta": 0, "_id": 0}
     ) or json.loads(fetch_swagger_text(swagger_url))
 
@@ -114,9 +74,26 @@ def handle_request(data: Dict[str, Any]) -> Any:
                 "[OpenCopilot] Apparently, the user request require calling more than single API endpoint "
                 "to get the job done"
             )
+
+            # check workflow in mongodb, if present use that, else ask planner to create a workflow based on summaries
+            # then call run_workflow on that
+            (document, score) = check_workflow_in_store(text, swagger_url)
+
+            _workflow = None
+            if document:
+                _workflow = mongo.workflows.find_one(
+                    {"_id": ObjectId(document.metadata["workflow_id"])}
+                )
+            else:
+                _workflow = create_workflow_from_operation_ids(
+                    bot_response.ids, SWAGGER_SPEC=swagger_doc
+                )
             return run_workflow(
-                WorkflowData(text, headers, server_base_url, swagger_url), swagger_doc
+                _workflow,
+                swagger_doc,
+                WorkflowData(text, headers, server_base_url, swagger_url),
             )
+
         elif len(bot_response.ids) == 0:
             logging.info("[OpenCopilot] The user request doesnot require an api call")
             return {"response": bot_response.bot_message}
@@ -125,9 +102,7 @@ def handle_request(data: Dict[str, Any]) -> Any:
             logging.info(
                 "[OpenCopilot] The user request can be handled in single API call"
             )
-            raise "Falling back to planner"
-        # else:
-        #     return {"": k}
+
     except Exception as e:
         logging.info(
             "[OpenCopilot] Something went wrong when try to get how many calls is required"
@@ -145,9 +120,7 @@ def handle_request(data: Dict[str, Any]) -> Any:
         )
         json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers)
 
-        formatted_response = json.dumps(
-            json_output, indent=4
-        )  # Indent the JSON with 4 spaces
+        formatted_response = json.dumps(json_output, indent=4)
         logging.info(
             "[OpenCopilot] We were able to match and call the API endpoint, the response was: {}".format(
                 formatted_response

diff --git a/llm-server/routes/workflow/extractors/extract_body.py b/llm-server/routes/workflow/extractors/extract_body.py
@@ -1,11 +1,12 @@
 import os
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
+from langchain.schema import AIMessage, HumanMessage, SystemMessage
+from langchain.chat_models import ChatOpenAI
 from utils.get_llm import get_llm
 
 from typing import Any
 from routes.workflow.extractors.extract_json import extract_json_payload
 from custom_types.t_json import JsonData
+import logging
 
 openai_api_key = os.getenv("OPENAI_API_KEY")
 llm = get_llm()
@@ -14,47 +15,33 @@
 def gen_body_from_schema(
     body_schema: str, text: str, prev_api_response: str, example: str
 ) -> Any:
-    _DEFAULT_TEMPLATE = """To enable a substantially intelligent language model to execute a series of APIs sequentially, the following essential details are necessary to gather information needed for the next API call:
-    1. Initial input when starting the flow: `{text}`
-    2. Previous API responses: `{prev_api_response}`
-    3. A JSON response schema that defines the expected format: `{body_schema}`
-
-    Try to adhere to this sample api payload as much as possible: ```{example}```
-    The JSON payload, enclosed within triple backticks on both sides, strictly conforming to the specified "type/format" as outlined in the schema is as follows:  
-    """
-
-    PROMPT = PromptTemplate(
-        input_variables=[
-            "text",
-            "body_schema",
-            "prev_api_response",
-            "example",
-        ],
-        template=_DEFAULT_TEMPLATE,
+    chat = ChatOpenAI(
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        model="gpt-3.5-turbo-16k",
+        temperature=0,
     )
 
-    PROMPT.format(
-        prev_api_response=prev_api_response,
-        body_schema=body_schema,
-        text=text,
-        example=example,
+    messages = [
+        SystemMessage(
+            content="You are an intelligent machine learning model that can produce REST API's body in json format, given the json schema, dummy json payload, user input, data from previous api calls."
+        ),
+        HumanMessage(content="Json Schema: {}".format(body_schema)),
+        HumanMessage(content="Dummy json payload: {}".format(example)),
+        HumanMessage(content="User input: {}".format(text)),
+        HumanMessage(content="prev api responses: {}".format(prev_api_response)),
+        HumanMessage(
+            content="Given the provided information, generate the appropriate JSON payload to use as body for the API request"
+        ),
+    ]
+    result = chat(messages)
+
+    logging.info("[OpenCopilot] LLM Body Response: {}".format(result.content))
+
+    d: Any = extract_json_payload(result.content)
+    logging.info(
+        "[OpenCopilot] Parsed the json payload: {}, context: {}".format(
+            d, "gen_body_from_schema"
+        )
     )
 
-    chain = LLMChain(
-        llm=llm,
-        prompt=PROMPT,
-        # memory=memory,
-        verbose=True,
-    )
-    json_string = chain.run(
-        {
-            "text": text,
-            "body_schema": body_schema,
-            "prev_api_response": prev_api_response,
-            "example": example,
-        }
-    )
-
-    response = extract_json_payload(json_string)
-
-    return response
+    return d
diff --git a/llm-server/routes/workflow/extractors/extract_param.py b/llm-server/routes/workflow/extractors/extract_param.py
@@ -1,10 +1,11 @@
 import os
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
+from langchain.chat_models import ChatOpenAI
 from routes.workflow.extractors.extract_json import extract_json_payload
 from utils.get_llm import get_llm
 from custom_types.t_json import JsonData
-from typing import Optional
+from typing import Optional, Any
+import logging
+from langchain.schema import HumanMessage, SystemMessage
 
 openai_api_key = os.getenv("OPENAI_API_KEY")
 llm = get_llm()
@@ -13,49 +14,32 @@
 def gen_params_from_schema(
     param_schema: str, text: str, prev_resp: str
 ) -> Optional[JsonData]:
-    """Extracts API parameters from a schema based on user text and previous response.
-
-    Args:
-        param_schema (JsonData): A snippet of the OpenAPI parameter schema relevant to this operation.
-        text (str): The original user text query.
-        prev_resp (str): The previous API response.
-
-    Returns:
-        Optional[JsonData]: The extracted JSON parameters, if successful.
-
-    This function constructs a prompt with the given inputs and passes it to
-    an LLM to generate a JSON string containing the parameters. It then parses
-    this to extract a JSON payload matching the schema structure.
-    """
-
-    _DEFAULT_TEMPLATE = """In order to facilitate the sequential execution of a highly intelligent language model with a series of APIs, we furnish the vital information required for executing the next API call.
-
-    The initial input at the onset of the process: {text}
-    The responses obtained from previous API calls: {prev_resp}
-    A schema for request parameters that defines the expected format: {param_schema}
-
-    The JSON payload, which is used to represent the query parameters and is constructed using the initial input and previous API responses, must be enclosed within triple backticks on both sides. It must strictly adhere to the specified "type/format" guidelines laid out in the schema, and the structure is as follows:"""
-
-    PROMPT = PromptTemplate(
-        input_variables=["prev_resp", "text", "param_schema"],
-        template=_DEFAULT_TEMPLATE,
-    )
-
-    PROMPT.format(
-        prev_resp=prev_resp,
-        text=text,
-        param_schema=param_schema,
+    chat = ChatOpenAI(
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        model="gpt-3.5-turbo-16k",
+        temperature=0,
     )
 
-    chain = LLMChain(llm=llm, prompt=PROMPT, verbose=True)
-    json_string = chain.run(
-        {
-            "param_schema": param_schema,
-            "text": text,
-            "prev_resp": prev_resp,
-        }
+    messages = [
+        SystemMessage(
+            content="You are an intelligent machine learning model that can produce REST API's params / query params in json format, given the json schema, user input, data from previous api calls."
+        ),
+        HumanMessage(content="Json Schema: {}".format(param_schema)),
+        HumanMessage(content="User input: {}".format(text)),
+        HumanMessage(content="prev api responses: {}".format(prev_resp)),
+        HumanMessage(
+            content="Based on the information provided,  construct a valid parameter object to be used with python requests library. In cases where user input doesnot contain information for a query, DO NOT add that specific query parameter to the output. "
+        ),
+    ]
+    result = chat(messages)
+
+    logging.info("[OpenCopilot] LLM Body Response: {}".format(result.content))
+
+    d: Optional[JsonData] = extract_json_payload(result.content)
+    logging.info(
+        "[OpenCopilot] Parsed the json payload: {}, context: {}".format(
+            d, "gen_body_from_schema"
+        )
     )
 
-    response = extract_json_payload(json_string)
-    print(f"Query params: {response}")
-    return response
+    return d
diff --git a/llm-server/routes/workflow/extractors/transform_api_response.py b/llm-server/routes/workflow/extractors/transform_api_response.py
@@ -0,0 +1,37 @@
+import os, logging
+from langchain.chat_models import ChatOpenAI
+from dotenv import load_dotenv
+from langchain.schema import HumanMessage, SystemMessage
+from typing import Any
+from routes.workflow.extractors.extract_json import extract_json_payload
+
+load_dotenv()
+
+openai_api_key = os.getenv("OPENAI_API_KEY")
+
+
+def transform_api_response_from_schema(server_url: str, api_response: str) -> str:
+    chat = ChatOpenAI(
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        model="gpt-3.5-turbo-16k",
+        temperature=0,
+    )
+
+    messages = [
+        SystemMessage(
+            content="You are a bot capable of comprehending API responses."
+        ),
+        HumanMessage(
+            content="Here is the response from current REST API: {} for endpoint: {}".format(
+                api_response, server_url
+            )
+        ),
+        HumanMessage(
+            content="Analyze the provided API responses and extract only the essential fields required for subsequent API interactions. Disregard any non-essential attributes such as CSS or color-related data. If there are generic fields like 'id,' provide them with more descriptive names in your response. Format your response as a JSON object with clear and meaningful keys that map to their respective values from the API response."
+        ),
+    ]
+
+    result = chat(messages)
+    logging.info("[OpenCopilot] Transformed Response: {}".format(result.content))
+
+    return result.content