Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Deprecated Endpoints #3235

Merged
merged 1 commit into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions backend/danswer/configs/chat_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,13 @@
# ~3k input, half for docs, half for chat history + prompts
CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072

# For selecting a different LLM question-answering prompt format
# Valid values: default, cot, weak
QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
# Capped in Vespa at 0.5
DOC_TIME_DECAY = float(
os.environ.get("DOC_TIME_DECAY") or 0.5 # Hits limit at 2 years by default
)
BASE_RECENCY_DECAY = 0.5
FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
# Currently this next one is not configurable via env
DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
# For the highest matching base size chunk, how many chunks above and below do we pull in by default
# Note this is not in any of the deployment configs yet
# Currently only applies to search flow not chat
Expand Down
38 changes: 1 addition & 37 deletions backend/danswer/llm/answering/prompts/quotes_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,17 @@

from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
from danswer.context.search.models import InferenceChunk
from danswer.db.search_settings import get_multilingual_expansion
from danswer.llm.answering.models import PromptConfig
from danswer.llm.utils import message_to_prompt_and_imgs
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.prompt_utils import add_date_time_to_prompt
from danswer.prompts.prompt_utils import build_complete_context_str


def _build_weak_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
"""Since Danswer supports a variety of LLMs, this less demanding prompt is provided
as an option to use with weaker LLMs such as small version, low float precision, quantized,
or distilled models. It only uses one context document and has very weak requirements of
output format.
"""
context_block = ""
if context_docs:
context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content)

prompt_str = WEAK_LLM_PROMPT.format(
system_prompt=prompt.system_prompt,
context_block=context_block,
task_prompt=prompt.task_prompt,
user_query=question,
)

if prompt.datetime_aware:
prompt_str = add_date_time_to_prompt(prompt_str=prompt_str)

return HumanMessage(content=prompt_str)


def _build_strong_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
Expand Down Expand Up @@ -81,15 +51,9 @@ def build_quotes_user_message(
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
prompt_builder = (
_build_weak_llm_quotes_prompt
if QA_PROMPT_OVERRIDE == "weak"
else _build_strong_llm_quotes_prompt
)

query, _ = message_to_prompt_and_imgs(message)

return prompt_builder(
return _build_strong_llm_quotes_prompt(
question=query,
context_docs=context_docs,
history_str=history_str,
Expand Down
4 changes: 0 additions & 4 deletions backend/danswer/one_shot_answer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ class PromptConfig(BaseModel):
datetime_aware: bool = True


class DocumentSetConfig(BaseModel):
id: int


class ToolConfig(BaseModel):
id: int

Expand Down
12 changes: 0 additions & 12 deletions backend/danswer/prompts/direct_qa_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,6 @@
"""


# For weak LLM which only takes one chunk and cannot output json
# Also not requiring quotes as it tends to not work
WEAK_LLM_PROMPT = f"""
{{system_prompt}}
{{context_block}}
{{task_prompt}}

{QUESTION_PAT.upper()}
{{user_query}}
""".strip()


# This is only for visualization for the users to specify their own prompts
# The actual flow does not work like this
PARAMATERIZED_PROMPT = f"""
Expand Down
6 changes: 3 additions & 3 deletions backend/danswer/secondary_llm_flows/query_validation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# NOTE No longer used. This needs to be revisited later.
import re
from collections.abc import Iterator

from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import StreamingError
from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY
from danswer.llm.exceptions import GenAIDisabledException
from danswer.llm.factory import get_default_llms
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
Expand Down Expand Up @@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool:


def get_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> tuple[str, bool]:
if skip_check:
return "Query Answerability Evaluation feature is turned off", True
Expand All @@ -67,7 +67,7 @@ def get_query_answerability(


def stream_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> Iterator[str]:
if skip_check:
yield get_json_line(
Expand Down
5 changes: 1 addition & 4 deletions backend/danswer/server/query_and_chat/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ class TagResponse(BaseModel):
tags: list[SourceTag]


class SimpleQueryRequest(BaseModel):
query: str


class UpdateChatSessionThreadRequest(BaseModel):
# If not specified, use Danswer default persona
chat_session_id: UUID
Expand Down Expand Up @@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel):
current_alternate_model: str | None


# This one is not used anymore
class QueryValidationResponse(BaseModel):
reasoning: str
answerable: bool
Expand Down
31 changes: 0 additions & 31 deletions backend/danswer/server/query_and_chat/query_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,11 @@
from danswer.document_index.vespa.index import VespaIndex
from danswer.one_shot_answer.answer_question import stream_search_answer
from danswer.one_shot_answer.models import DirectQARequest
from danswer.secondary_llm_flows.query_validation import get_query_answerability
from danswer.secondary_llm_flows.query_validation import stream_query_answerability
from danswer.server.query_and_chat.models import AdminSearchRequest
from danswer.server.query_and_chat.models import AdminSearchResponse
from danswer.server.query_and_chat.models import ChatSessionDetails
from danswer.server.query_and_chat.models import ChatSessionsResponse
from danswer.server.query_and_chat.models import QueryValidationResponse
from danswer.server.query_and_chat.models import SearchSessionDetailResponse
from danswer.server.query_and_chat.models import SimpleQueryRequest
from danswer.server.query_and_chat.models import SourceTag
from danswer.server.query_and_chat.models import TagResponse
from danswer.server.query_and_chat.token_limit import check_token_rate_limits
Expand Down Expand Up @@ -135,18 +131,6 @@ def get_tags(
return TagResponse(tags=server_tags)


@basic_router.post("/query-validation")
def query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> QueryValidationResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
reasoning, answerable = get_query_answerability(simple_query.query)
return QueryValidationResponse(reasoning=reasoning, answerable=answerable)


@basic_router.get("/user-searches")
def get_user_search_sessions(
user: User | None = Depends(current_user),
Expand Down Expand Up @@ -247,21 +231,6 @@ def get_search_session(
return response


# NOTE No longer used, after search/chat redesign.
# No search responses are answered with a conversational generative AI response
@basic_router.post("/stream-query-validation")
def stream_query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> StreamingResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
return StreamingResponse(
stream_query_answerability(simple_query.query), media_type="application/json"
)


@basic_router.post("/stream-answer-with-quote")
def get_answer_with_quote(
query_request: DirectQARequest,
Expand Down
Loading