Merge pull request #113 from openchatai/hotfix/main

raising a hotfix for main
openchatai · Oct 3, 2023 · 21555dc · 21555dc
2 parents fc9c411 + 6ebfb0b
commit 21555dc
Show file tree

Hide file tree

Showing 5 changed files with 200 additions and 120 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/llm-server/notebooks/unit_test.ipynb b/llm-server/notebooks/unit_test.ipynb
@@ -0,0 +1,55 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\"{\\\\n    \\'ids\\': [\\\\n        \\'createBoard\\'\\\\n    ],\\\\n    \\'bot_message\\': \\'To create a Trello board with the name \\'trello_test\\', you can use the \\'createBoard\\' API call.\\'\\\\n}\\\\\"\\\\n\"'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "json.dumps(\"\"\"{\\n    'ids': [\\n        'createBoard'\\n    ],\\n    'bot_message': 'To create a Trello board with the name 'trello_test', you can use the 'createBoard' API call.'\\n}\"\n",
+    "\"\"\", separators=(\",\", \":\"))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py
@@ -14,9 +14,16 @@
 from prompts.base import api_base_prompt, non_api_base_prompt
 from routes.workflow.typings.run_workflow_input import WorkflowData
 from routes.workflow.workflow_service import run_workflow
-from utils.base import try_to_match_and_call_api_endpoint
+from utils.detect_multiple_intents import hasSingleIntent
+import os
+from dotenv import load_dotenv
+from typing import Dict, Any, cast
 from utils.db import Database
 from utils.detect_multiple_intents import hasSingleIntent
+import json
+import yaml
+from yaml.parser import ParserError
+from api_caller.base import try_to_match_and_call_api_endpoint
 
 db_instance = Database()
 mongo = db_instance.get_db()
@@ -37,13 +44,42 @@ def fetch_swagger_text(swagger_url: str) -> str:
     if swagger_url.startswith("https://"):
         response = requests.get(swagger_url)
         if response.status_code == 200:
-            return response.text
-        raise Exception(FAILED_TO_FETCH_SWAGGER_CONTENT)
+            try:
+                # Try parsing the content as JSON
+                json_content = json.loads(response.text)
+                return json.dumps(json_content, indent=2)
+            except json.JSONDecodeError:
+                try:
+                    # Try parsing the content as YAML
+                    yaml_content = yaml.safe_load(response.text)
+                    if isinstance(yaml_content, dict):
+                        return json.dumps(yaml_content, indent=2)
+                    else:
+                        raise Exception("Invalid YAML content")
+                except ParserError:
+                    raise Exception("Failed to parse content as JSON or YAML")
+
+        raise Exception("Failed to fetch Swagger content")
+
     try:
         with open(shared_folder + swagger_url, "r") as file:
-            return file.read()
+            content = file.read()
+            try:
+                # Try parsing the content as JSON
+                json_content = json.loads(content)
+                return json.dumps(json_content, indent=2)
+            except json.JSONDecodeError:
+                try:
+                    # Try parsing the content as YAML
+                    yaml_content = yaml.safe_load(content)
+                    if isinstance(yaml_content, dict):
+                        return json.dumps(yaml_content, indent=2)
+                    else:
+                        raise Exception("Invalid YAML content")
+                except ParserError:
+                    raise Exception("Failed to parse content as JSON or YAML")
     except FileNotFoundError:
-        raise Exception(FILE_NOT_FOUND)
+        raise Exception("File not found")
 
 
 def handle_request(data: Dict[str, Any]) -> Any:
@@ -68,36 +104,60 @@ def handle_request(data: Dict[str, Any]) -> Any:
     ) or json.loads(fetch_swagger_text(swagger_url))
 
     try:
-        logging.info("[OpenCopilot] Trying to figure out if the user request require 1) APIs calls 2) If yes how many "
-                     "of them")
-        k = hasSingleIntent(swagger_doc, text)
-        if k is False:
-            logging.warning("[OpenCopilot] Apparently, the user request require calling more than single API endpoint "
-                            "to get the job done")
+        logging.info(
+            "[OpenCopilot] Trying to figure out if the user request require 1) APIs calls 2) If yes how many "
+            "of them"
+        )
+        bot_response = hasSingleIntent(swagger_doc, text)
+        if len(bot_response.ids) > 1:
+            logging.warning(
+                "[OpenCopilot] Apparently, the user request require calling more than single API endpoint "
+                "to get the job done"
+            )
             return run_workflow(
                 WorkflowData(text, headers, server_base_url, swagger_url), swagger_doc
             )
-        elif k is True:
+        elif len(bot_response.ids) == 0:
+            logging.info("[OpenCopilot] The user request doesnot require an api call")
+            return {"response": bot_response.bot_message}
+
+        else:
             logging.info(
-                "[OpenCopilot] The user request can be handled in single API call")
+                "[OpenCopilot] The user request can be handled in single API call"
+            )
+            raise "Falling back to planner"
+        # else:
+        #     return {"": k}
     except Exception as e:
-        logging.info("[OpenCopilot] Something went wrong when try to get how many calls is required")
+        logging.info(
+            "[OpenCopilot] Something went wrong when try to get how many calls is required"
+        )
 
     logging.info(
-        "[OpenCopilot] The user request will be handled by single API call or otherwise a normal text response")
+        "[OpenCopilot] The user request will be handled by single API call or otherwise a normal text response"
+    )
 
     swagger_spec = OpenAPISpec.from_text(fetch_swagger_text(swagger_url))
 
     try:
-        logging.info("[OpenCopilot] Trying to match the request to a single API endpoint")
+        logging.info(
+            "[OpenCopilot] Trying to match the request to a single API endpoint"
+        )
         json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers)
 
-        formatted_response = json.dumps(json_output, indent=4)  # Indent the JSON with 4 spaces
+        formatted_response = json.dumps(
+            json_output, indent=4
+        )  # Indent the JSON with 4 spaces
         logging.info(
-            "[OpenCopilot] We were able to match and call the API endpoint, the response was: {}".format(json_output))
+            "[OpenCopilot] We were able to match and call the API endpoint, the response was: {}".format(
+                formatted_response
+            )
+        )
     except Exception as e:
-        logging.info("[OpenCopilot] Failed to call the single API endpoint - so we will fallback to normal text "
-                     "response")
+        logging.info(
+            "[OpenCopilot] Failed to call the single API endpoint - so we will fallback to normal text "
+            "response"
+        )
         logging.error(f"{FAILED_TO_CALL_API_ENDPOINT}: {str(e)}")
         logging.error("Exception traceback:\n" + traceback.format_exc())
         json_output = None

diff --git a/llm-server/routes/workflow/extractors/extract_json.py b/llm-server/routes/workflow/extractors/extract_json.py
@@ -1,18 +1,17 @@
 import re
 import json
-from typing import Optional, Union
+from typing import Optional, Dict, List, Union
 
 JsonData = Union[dict, list]
 
 
-def extract_json_payload(input_string: str) -> Optional[JsonData]:
-    # Remove all whitespace characters
-    input_string = re.sub(r"\s", "", input_string)
+def extract_json_payload(
+    input_string: str,
+) -> Optional[
+    Union[Dict[str, List[Union[str, Dict[str, Union[str, int]]]]], List[str]]
+]:
 
-    # Replace single quotes with double quotes
-    input_string = re.sub(r"'", '"', input_string)
-
-    match = re.findall(r"{.+[:,].+}|\[.+[,:].+\]", input_string)
+    match = re.findall(r"{[^{}]*}|\[[^\[\]]*\]", input_string)
 
     try:
         return json.loads(match[0]) if match else None

diff --git a/llm-server/utils/detect_multiple_intents.py b/llm-server/utils/detect_multiple_intents.py
@@ -1,49 +1,33 @@
 import json
 import logging
 import re
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union, cast
 from typing import List
 
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from utils.get_llm import get_llm
-
-
-# use spaCy or BERT for more accurate results
-def hasMultipleIntents(user_input: str) -> bool:
-    # Keywords for multiple questions
-    question_keywords = [
-        "and",
-        "also",
-        "in addition",
-        "moreover",
-        "furthermore",
-        "besides",
-        "additionally",
-        "another question",
-        "second question",
-        "next, ask",
-        "thirdly",
-        "finally",
-        "lastly",
-    ]
 
-    # Check for question keywords
-    question_pattern = "|".join(re.escape(keyword) for keyword in question_keywords)
-    question_matches = [
-        match.group()
-        for match in re.finditer(question_pattern, user_input, re.IGNORECASE)
-    ]
+from langchain.schema import AIMessage, HumanMessage, SystemMessage
+from langchain.chat_models import ChatOpenAI
+from routes.workflow.extractors.extract_json import extract_json_payload
+import os
+from dotenv import load_dotenv
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
 
-    print(f"Found {question_matches} in the following input: {user_input}")
-    return bool(question_matches)
+load_dotenv()
 
 
-# user_input = (
-#     "I want to fetch data from API A and also, can you answer another question?"
-# )
-# result = hasMultipleIntents(user_input)
-# print(json.dumps(result, indent=2))
+class BotMessage:
+    def __init__(self, ids: List[str], bot_message: str):
+        self.ids = ids
+        self.bot_message = bot_message
+
+    def to_dict(self) -> Dict[str, Union[str, List[str]]]:
+        return {"ids": self.ids, "bot_message": self.bot_message}
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Union[str, List[str]]]) -> "BotMessage":
+        return cls(cast(List[str], data["ids"]), cast(str, data["bot_message"]))
 
 
 def getSummaries(swagger_doc: Any):
@@ -67,63 +51,45 @@ def getSummaries(swagger_doc: Any):
     return summaries
 
 
-def hasSingleIntent(swagger_doc: Any, user_requirement: str) -> bool:
-    # todo use create_structured_output_chain with validation
+def hasSingleIntent(swagger_doc: Any, user_requirement: str) -> BotMessage:
     summaries = getSummaries(swagger_doc)
-    _DEFAULT_TEMPLATE = """You are an AI chatbot that determines the sequence of API calls needed to perform an action. You only provide the user with the list of API calls. You have been given a summary of the APIs that a third party system allows access to. However, users may also ask general questions that do not require API calls. 
-
-When given:
-
-- A list of API summaries `{summaries}`
-- The user's desired action `{user_requirement}`
 
-Respond with the following JSON structure:
-
-{{
-  "ids": [
-    "list",
-    "of",
-    "operation",
-    "ids"
-  ],
-  "bot_message": "Bot reasoning here" 
-}}
-
-
-IT'S EXTREMELY IMPORTANT TO ONLY RETURN THE OPERATION IDS REQUIRE TO GET THE JOB DONE, NEVER ADD THINGS THAT IS NOT REQUIRED.
-
-Only return the JSON structure, no additional text or formatting, just JSON.
-"""
-    llm = get_llm()
-    PROMPT = PromptTemplate(
-        input_variables=["summaries", "user_requirement"],
-        template=_DEFAULT_TEMPLATE,
+    chat = ChatOpenAI(
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        model="gpt-3.5-turbo-16k",
+        temperature=0,
     )
+    messages = [
+        SystemMessage(
+            content="You serve as an AI co-pilot tasked with identifying the correct sequence of API calls necessary to execute a user's action. It is essential that you consistently provide a valid JSON payload (use double quotes) in your responses. If the user's input is a `question` and does not involve initiating any actions or require API calls, please respond appropriately in the `bot_message` section of the response while leaving the `ids` field empty ([]). If the user is asking you to perform a `CRUD` operation, provide the list of operation ids of api calls needed in the `ids` field of the json"
+        ),
+        HumanMessage(
+            content="Here's a list of api summaries {}".format(summaries),
+        ),
+        HumanMessage(content="{}".format(user_requirement)),
+        HumanMessage(
+            content="""Reply in the following json format ```{
+                "ids": [
+                    "list",
+                    "of",
+                    "operation",
+                    "ids"
+                ],
+                "bot_message": "Bot response here" 
+            }```"""
+        ),
+    ]
 
-    PROMPT.format(user_requirement=user_requirement, summaries=summaries)
-
-    chain = LLMChain(
-        llm=llm,
-        prompt=PROMPT,
-        # memory=memory,
-        verbose=True,
+    result = chat(messages)
+    logging.info(
+        "[OpenCopilot] Extracted the needed steps to get the job done: {}".format(
+            result.content
+        )
+    )
+    d: Any = extract_json_payload(result.content)
+    logging.info(
+        "[OpenCopilot] Parsed the json payload: {}, context: {}".format(
+            d, "hasSingleIntent"
+        )
     )
-    response = json.loads(chain.run(
-        {
-            "summaries": summaries,
-            "user_requirement": user_requirement,
-        }
-    ))
-
-    formatted_response = json.dumps(response, indent=4)  # Indent the JSON with 4 spaces
-
-    logging.info("[OpenCopilot] Extracted the needed steps to get the job done: {}".format(formatted_response))
-
-    if len(response["ids"]) == 1:
-        logging.info("[OpenCopilot] The user request can be done in a single API")
-        return True
-    elif len(response["ids"]) > 1:
-        logging.info("[OpenCopilot] The user request require multiple API calls to be done")
-        return False
-    else:
-        return response["bot_message"]
+    return BotMessage.from_dict(d)