Skip to content

Commit

Permalink
k
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 committed Nov 23, 2024
1 parent 62a4aa1 commit 5ffe0df
Show file tree
Hide file tree
Showing 7 changed files with 5 additions and 96 deletions.
5 changes: 0 additions & 5 deletions backend/danswer/configs/chat_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,13 @@
# ~3k input, half for docs, half for chat history + prompts
CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072

# For selecting a different LLM question-answering prompt format
# Valid values: default, cot, weak
QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
# Capped in Vespa at 0.5
DOC_TIME_DECAY = float(
os.environ.get("DOC_TIME_DECAY") or 0.5 # Hits limit at 2 years by default
)
BASE_RECENCY_DECAY = 0.5
FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
# Currently this next one is not configurable via env
DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
# For the highest matching base size chunk, how many chunks above and below do we pull in by default
# Note this is not in any of the deployment configs yet
# Currently only applies to search flow not chat
Expand Down
38 changes: 1 addition & 37 deletions backend/danswer/llm/answering/prompts/quotes_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,17 @@

from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
from danswer.context.search.models import InferenceChunk
from danswer.db.search_settings import get_multilingual_expansion
from danswer.llm.answering.models import PromptConfig
from danswer.llm.utils import message_to_prompt_and_imgs
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.prompt_utils import add_date_time_to_prompt
from danswer.prompts.prompt_utils import build_complete_context_str


def _build_weak_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
"""Since Danswer supports a variety of LLMs, this less demanding prompt is provided
as an option to use with weaker LLMs such as small version, low float precision, quantized,
or distilled models. It only uses one context document and has very weak requirements of
output format.
"""
context_block = ""
if context_docs:
context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content)

prompt_str = WEAK_LLM_PROMPT.format(
system_prompt=prompt.system_prompt,
context_block=context_block,
task_prompt=prompt.task_prompt,
user_query=question,
)

if prompt.datetime_aware:
prompt_str = add_date_time_to_prompt(prompt_str=prompt_str)

return HumanMessage(content=prompt_str)


def _build_strong_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
Expand Down Expand Up @@ -81,15 +51,9 @@ def build_quotes_user_message(
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
prompt_builder = (
_build_weak_llm_quotes_prompt
if QA_PROMPT_OVERRIDE == "weak"
else _build_strong_llm_quotes_prompt
)

query, _ = message_to_prompt_and_imgs(message)

return prompt_builder(
return _build_strong_llm_quotes_prompt(
question=query,
context_docs=context_docs,
history_str=history_str,
Expand Down
4 changes: 0 additions & 4 deletions backend/danswer/one_shot_answer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ class PromptConfig(BaseModel):
datetime_aware: bool = True


class DocumentSetConfig(BaseModel):
id: int


class ToolConfig(BaseModel):
id: int

Expand Down
12 changes: 0 additions & 12 deletions backend/danswer/prompts/direct_qa_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,6 @@
"""


# For weak LLM which only takes one chunk and cannot output json
# Also not requiring quotes as it tends to not work
WEAK_LLM_PROMPT = f"""
{{system_prompt}}
{{context_block}}
{{task_prompt}}
{QUESTION_PAT.upper()}
{{user_query}}
""".strip()


# This is only for visualization for the users to specify their own prompts
# The actual flow does not work like this
PARAMATERIZED_PROMPT = f"""
Expand Down
6 changes: 3 additions & 3 deletions backend/danswer/secondary_llm_flows/query_validation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# NOTE No longer used. This needs to be revisited later.
import re
from collections.abc import Iterator

from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import StreamingError
from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY
from danswer.llm.exceptions import GenAIDisabledException
from danswer.llm.factory import get_default_llms
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
Expand Down Expand Up @@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool:


def get_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> tuple[str, bool]:
if skip_check:
return "Query Answerability Evaluation feature is turned off", True
Expand All @@ -67,7 +67,7 @@ def get_query_answerability(


def stream_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> Iterator[str]:
if skip_check:
yield get_json_line(
Expand Down
5 changes: 1 addition & 4 deletions backend/danswer/server/query_and_chat/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ class TagResponse(BaseModel):
tags: list[SourceTag]


class SimpleQueryRequest(BaseModel):
query: str


class UpdateChatSessionThreadRequest(BaseModel):
# If not specified, use Danswer default persona
chat_session_id: UUID
Expand Down Expand Up @@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel):
current_alternate_model: str | None


# This one is not used anymore
class QueryValidationResponse(BaseModel):
reasoning: str
answerable: bool
Expand Down
31 changes: 0 additions & 31 deletions backend/danswer/server/query_and_chat/query_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,11 @@
from danswer.document_index.vespa.index import VespaIndex
from danswer.one_shot_answer.answer_question import stream_search_answer
from danswer.one_shot_answer.models import DirectQARequest
from danswer.secondary_llm_flows.query_validation import get_query_answerability
from danswer.secondary_llm_flows.query_validation import stream_query_answerability
from danswer.server.query_and_chat.models import AdminSearchRequest
from danswer.server.query_and_chat.models import AdminSearchResponse
from danswer.server.query_and_chat.models import ChatSessionDetails
from danswer.server.query_and_chat.models import ChatSessionsResponse
from danswer.server.query_and_chat.models import QueryValidationResponse
from danswer.server.query_and_chat.models import SearchSessionDetailResponse
from danswer.server.query_and_chat.models import SimpleQueryRequest
from danswer.server.query_and_chat.models import SourceTag
from danswer.server.query_and_chat.models import TagResponse
from danswer.server.query_and_chat.token_limit import check_token_rate_limits
Expand Down Expand Up @@ -135,18 +131,6 @@ def get_tags(
return TagResponse(tags=server_tags)


@basic_router.post("/query-validation")
def query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> QueryValidationResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
reasoning, answerable = get_query_answerability(simple_query.query)
return QueryValidationResponse(reasoning=reasoning, answerable=answerable)


@basic_router.get("/user-searches")
def get_user_search_sessions(
user: User | None = Depends(current_user),
Expand Down Expand Up @@ -247,21 +231,6 @@ def get_search_session(
return response


# NOTE No longer used, after search/chat redesign.
# No search responses are answered with a conversational generative AI response
@basic_router.post("/stream-query-validation")
def stream_query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> StreamingResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
return StreamingResponse(
stream_query_answerability(simple_query.query), media_type="application/json"
)


@basic_router.post("/stream-answer-with-quote")
def get_answer_with_quote(
query_request: DirectQARequest,
Expand Down

0 comments on commit 5ffe0df

Please sign in to comment.