From 5ffe0df8417e070e0655f4b4d3244e0e9bc09ad8 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sat, 23 Nov 2024 14:31:08 -0800 Subject: [PATCH] k --- backend/danswer/configs/chat_configs.py | 5 --- .../llm/answering/prompts/quotes_prompt.py | 38 +------------------ backend/danswer/one_shot_answer/models.py | 4 -- backend/danswer/prompts/direct_qa_prompts.py | 12 ------ .../secondary_llm_flows/query_validation.py | 6 +-- .../danswer/server/query_and_chat/models.py | 5 +-- .../server/query_and_chat/query_backend.py | 31 --------------- 7 files changed, 5 insertions(+), 96 deletions(-) diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index a72baacf686..2d72bed0f5a 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -17,9 +17,6 @@ # ~3k input, half for docs, half for chat history + prompts CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072 -# For selecting a different LLM question-answering prompt format -# Valid values: default, cot, weak -QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None # 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay # Capped in Vespa at 0.5 DOC_TIME_DECAY = float( @@ -27,8 +24,6 @@ ) BASE_RECENCY_DECAY = 0.5 FAVOR_RECENT_DECAY_MULTIPLIER = 2.0 -# Currently this next one is not configurable via env -DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak" # For the highest matching base size chunk, how many chunks above and below do we pull in by default # Note this is not in any of the deployment configs yet # Currently only applies to search flow not chat diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py index 3cdaaefcfdb..00f22f9e7df 100644 --- a/backend/danswer/llm/answering/prompts/quotes_prompt.py +++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py @@ -2,7 +2,6 @@ from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import LANGUAGE_HINT -from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE from danswer.context.search.models import InferenceChunk from danswer.db.search_settings import get_multilingual_expansion from danswer.llm.answering.models import PromptConfig @@ -10,39 +9,10 @@ from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK from danswer.prompts.direct_qa_prompts import JSON_PROMPT -from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT from danswer.prompts.prompt_utils import add_date_time_to_prompt from danswer.prompts.prompt_utils import build_complete_context_str -def _build_weak_llm_quotes_prompt( - question: str, - context_docs: list[LlmDoc] | list[InferenceChunk], - history_str: str, - prompt: PromptConfig, -) -> HumanMessage: - """Since Danswer supports a variety of LLMs, this less demanding prompt is provided - as an option to use with weaker LLMs such as small version, low float precision, quantized, - or distilled models. It only uses one context document and has very weak requirements of - output format. - """ - context_block = "" - if context_docs: - context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content) - - prompt_str = WEAK_LLM_PROMPT.format( - system_prompt=prompt.system_prompt, - context_block=context_block, - task_prompt=prompt.task_prompt, - user_query=question, - ) - - if prompt.datetime_aware: - prompt_str = add_date_time_to_prompt(prompt_str=prompt_str) - - return HumanMessage(content=prompt_str) - - def _build_strong_llm_quotes_prompt( question: str, context_docs: list[LlmDoc] | list[InferenceChunk], @@ -81,15 +51,9 @@ def build_quotes_user_message( history_str: str, prompt: PromptConfig, ) -> HumanMessage: - prompt_builder = ( - _build_weak_llm_quotes_prompt - if QA_PROMPT_OVERRIDE == "weak" - else _build_strong_llm_quotes_prompt - ) - query, _ = message_to_prompt_and_imgs(message) - return prompt_builder( + return _build_strong_llm_quotes_prompt( question=query, context_docs=context_docs, history_str=history_str, diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index 21463867d28..630c7b5cab4 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -36,10 +36,6 @@ class PromptConfig(BaseModel): datetime_aware: bool = True -class DocumentSetConfig(BaseModel): - id: int - - class ToolConfig(BaseModel): id: int diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index b1229b896a7..b00cfcebf15 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -118,18 +118,6 @@ """ -# For weak LLM which only takes one chunk and cannot output json -# Also not requiring quotes as it tends to not work -WEAK_LLM_PROMPT = f""" -{{system_prompt}} -{{context_block}} -{{task_prompt}} - -{QUESTION_PAT.upper()} -{{user_query}} -""".strip() - - # This is only for visualization for the users to specify their own prompts # The actual flow does not work like this PARAMATERIZED_PROMPT = f""" diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py index 2ee428f0090..d11e603715e 100644 --- a/backend/danswer/secondary_llm_flows/query_validation.py +++ b/backend/danswer/secondary_llm_flows/query_validation.py @@ -1,9 +1,9 @@ +# NOTE No longer used. This needs to be revisited later. import re from collections.abc import Iterator from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import StreamingError -from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llms from danswer.llm.utils import dict_based_prompt_to_langchain_prompt @@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool: def get_query_answerability( - user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY + user_query: str, skip_check: bool = False ) -> tuple[str, bool]: if skip_check: return "Query Answerability Evaluation feature is turned off", True @@ -67,7 +67,7 @@ def get_query_answerability( def stream_query_answerability( - user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY + user_query: str, skip_check: bool = False ) -> Iterator[str]: if skip_check: yield get_json_line( diff --git a/backend/danswer/server/query_and_chat/models.py b/backend/danswer/server/query_and_chat/models.py index c316435996e..ae6e651fff1 100644 --- a/backend/danswer/server/query_and_chat/models.py +++ b/backend/danswer/server/query_and_chat/models.py @@ -29,10 +29,6 @@ class TagResponse(BaseModel): tags: list[SourceTag] -class SimpleQueryRequest(BaseModel): - query: str - - class UpdateChatSessionThreadRequest(BaseModel): # If not specified, use Danswer default persona chat_session_id: UUID @@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel): current_alternate_model: str | None +# This one is not used anymore class QueryValidationResponse(BaseModel): reasoning: str answerable: bool diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index 6fb848dfa38..f07d98f0aa9 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -34,15 +34,11 @@ from danswer.document_index.vespa.index import VespaIndex from danswer.one_shot_answer.answer_question import stream_search_answer from danswer.one_shot_answer.models import DirectQARequest -from danswer.secondary_llm_flows.query_validation import get_query_answerability -from danswer.secondary_llm_flows.query_validation import stream_query_answerability from danswer.server.query_and_chat.models import AdminSearchRequest from danswer.server.query_and_chat.models import AdminSearchResponse from danswer.server.query_and_chat.models import ChatSessionDetails from danswer.server.query_and_chat.models import ChatSessionsResponse -from danswer.server.query_and_chat.models import QueryValidationResponse from danswer.server.query_and_chat.models import SearchSessionDetailResponse -from danswer.server.query_and_chat.models import SimpleQueryRequest from danswer.server.query_and_chat.models import SourceTag from danswer.server.query_and_chat.models import TagResponse from danswer.server.query_and_chat.token_limit import check_token_rate_limits @@ -135,18 +131,6 @@ def get_tags( return TagResponse(tags=server_tags) -@basic_router.post("/query-validation") -def query_validation( - simple_query: SimpleQueryRequest, _: User = Depends(current_user) -) -> QueryValidationResponse: - # Note if weak model prompt is chosen, this check does not occur and will simply return that - # the query is valid, this is because weaker models cannot really handle this task well. - # Additionally, some weak model servers cannot handle concurrent inferences. - logger.notice(f"Validating query: {simple_query.query}") - reasoning, answerable = get_query_answerability(simple_query.query) - return QueryValidationResponse(reasoning=reasoning, answerable=answerable) - - @basic_router.get("/user-searches") def get_user_search_sessions( user: User | None = Depends(current_user), @@ -247,21 +231,6 @@ def get_search_session( return response -# NOTE No longer used, after search/chat redesign. -# No search responses are answered with a conversational generative AI response -@basic_router.post("/stream-query-validation") -def stream_query_validation( - simple_query: SimpleQueryRequest, _: User = Depends(current_user) -) -> StreamingResponse: - # Note if weak model prompt is chosen, this check does not occur and will simply return that - # the query is valid, this is because weaker models cannot really handle this task well. - # Additionally, some weak model servers cannot handle concurrent inferences. - logger.notice(f"Validating query: {simple_query.query}") - return StreamingResponse( - stream_query_answerability(simple_query.query), media_type="application/json" - ) - - @basic_router.post("/stream-answer-with-quote") def get_answer_with_quote( query_request: DirectQARequest,