Skip to content

Commit

Permalink
Refactor Search (onyx-dot-app#3233)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored and ahmadassaf committed Nov 25, 2024
1 parent 57e3cf4 commit b576556
Show file tree
Hide file tree
Showing 64 changed files with 185 additions and 177 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import sqlalchemy as sa

from danswer.db.models import IndexModelStatus
from danswer.search.enums import RecencyBiasSetting
from danswer.search.enums import SearchType
from danswer.context.search.enums import RecencyBiasSetting
from danswer.context.search.enums import SearchType

# revision identifiers, used by Alembic.
revision = "776b3bbe9092"
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/chat/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

from danswer.chat.models import CitationInfo
from danswer.chat.models import LlmDoc
from danswer.context.search.models import InferenceSection
from danswer.db.chat import get_chat_messages_by_session
from danswer.db.models import ChatMessage
from danswer.llm.answering.models import PreviousMessage
from danswer.search.models import InferenceSection
from danswer.utils.logger import setup_logger

logger = setup_logger()
Expand Down
8 changes: 4 additions & 4 deletions backend/danswer/chat/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from pydantic import BaseModel

from danswer.configs.constants import DocumentSource
from danswer.search.enums import QueryFlow
from danswer.search.enums import SearchType
from danswer.search.models import RetrievalDocs
from danswer.search.models import SearchResponse
from danswer.context.search.enums import QueryFlow
from danswer.context.search.enums import SearchType
from danswer.context.search.models import RetrievalDocs
from danswer.context.search.models import SearchResponse
from danswer.tools.tool_implementations.custom.base_tool_types import ToolResultType


Expand Down
20 changes: 10 additions & 10 deletions backend/danswer/chat/process_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@
from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from danswer.configs.constants import MessageType
from danswer.context.search.enums import OptionalSearchSetting
from danswer.context.search.enums import QueryFlow
from danswer.context.search.enums import SearchType
from danswer.context.search.models import InferenceSection
from danswer.context.search.models import RetrievalDetails
from danswer.context.search.retrieval.search_runner import inference_sections_from_ids
from danswer.context.search.utils import chunks_or_sections_to_search_docs
from danswer.context.search.utils import dedupe_documents
from danswer.context.search.utils import drop_llm_indices
from danswer.context.search.utils import relevant_sections_to_indices
from danswer.db.chat import attach_files_to_chat_message
from danswer.db.chat import create_db_search_doc
from danswer.db.chat import create_new_chat_message
Expand Down Expand Up @@ -56,16 +66,6 @@
from danswer.llm.factory import get_main_llm_from_tuple
from danswer.llm.utils import litellm_exception_to_error_msg
from danswer.natural_language_processing.utils import get_tokenizer
from danswer.search.enums import OptionalSearchSetting
from danswer.search.enums import QueryFlow
from danswer.search.enums import SearchType
from danswer.search.models import InferenceSection
from danswer.search.models import RetrievalDetails
from danswer.search.retrieval.search_runner import inference_sections_from_ids
from danswer.search.utils import chunks_or_sections_to_search_docs
from danswer.search.utils import dedupe_documents
from danswer.search.utils import drop_llm_indices
from danswer.search.utils import relevant_sections_to_indices
from danswer.server.query_and_chat.models import ChatMessageDetail
from danswer.server.query_and_chat.models import CreateChatMessageRequest
from danswer.server.utils import get_json_line
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.configs.constants import DocumentSource
from danswer.context.search.enums import LLMEvaluationType
from danswer.context.search.enums import OptionalSearchSetting
from danswer.context.search.enums import SearchType
from danswer.db.models import Persona
from danswer.db.models import SearchSettings
from danswer.indexing.models import BaseChunk
from danswer.indexing.models import IndexingSetting
from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import OptionalSearchSetting
from danswer.search.enums import SearchType
from shared_configs.enums import RerankerProvider


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@

from danswer.chat.models import SectionRelevancePiece
from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
from danswer.context.search.enums import LLMEvaluationType
from danswer.context.search.enums import QueryFlow
from danswer.context.search.enums import SearchType
from danswer.context.search.models import IndexFilters
from danswer.context.search.models import InferenceChunk
from danswer.context.search.models import InferenceSection
from danswer.context.search.models import RerankMetricsContainer
from danswer.context.search.models import RetrievalMetricsContainer
from danswer.context.search.models import SearchQuery
from danswer.context.search.models import SearchRequest
from danswer.context.search.postprocessing.postprocessing import cleanup_chunks
from danswer.context.search.postprocessing.postprocessing import search_postprocessing
from danswer.context.search.preprocessing.preprocessing import retrieval_preprocessing
from danswer.context.search.retrieval.search_runner import retrieve_chunks
from danswer.context.search.utils import inference_section_from_chunks
from danswer.context.search.utils import relevant_sections_to_indices
from danswer.db.models import User
from danswer.db.search_settings import get_current_search_settings
from danswer.document_index.factory import get_default_document_index
Expand All @@ -16,22 +32,6 @@
from danswer.llm.answering.prune_and_merge import ChunkRange
from danswer.llm.answering.prune_and_merge import merge_chunk_intervals
from danswer.llm.interfaces import LLM
from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import QueryFlow
from danswer.search.enums import SearchType
from danswer.search.models import IndexFilters
from danswer.search.models import InferenceChunk
from danswer.search.models import InferenceSection
from danswer.search.models import RerankMetricsContainer
from danswer.search.models import RetrievalMetricsContainer
from danswer.search.models import SearchQuery
from danswer.search.models import SearchRequest
from danswer.search.postprocessing.postprocessing import cleanup_chunks
from danswer.search.postprocessing.postprocessing import search_postprocessing
from danswer.search.preprocessing.preprocessing import retrieval_preprocessing
from danswer.search.retrieval.search_runner import retrieve_chunks
from danswer.search.utils import inference_section_from_chunks
from danswer.search.utils import relevant_sections_to_indices
from danswer.secondary_llm_flows.agentic_evaluation import evaluate_inference_section
from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import FunctionCall
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
from danswer.configs.constants import RETURN_SEPARATOR
from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX
from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MIN
from danswer.context.search.enums import LLMEvaluationType
from danswer.context.search.models import ChunkMetric
from danswer.context.search.models import InferenceChunk
from danswer.context.search.models import InferenceChunkUncleaned
from danswer.context.search.models import InferenceSection
from danswer.context.search.models import MAX_METRICS_CONTENT
from danswer.context.search.models import RerankMetricsContainer
from danswer.context.search.models import SearchQuery
from danswer.document_index.document_index_utils import (
translate_boost_count_to_multiplier,
)
from danswer.llm.interfaces import LLM
from danswer.natural_language_processing.search_nlp_models import RerankingModel
from danswer.search.enums import LLMEvaluationType
from danswer.search.models import ChunkMetric
from danswer.search.models import InferenceChunk
from danswer.search.models import InferenceChunkUncleaned
from danswer.search.models import InferenceSection
from danswer.search.models import MAX_METRICS_CONTENT
from danswer.search.models import RerankMetricsContainer
from danswer.search.models import SearchQuery
from danswer.secondary_llm_flows.chunk_usefulness import llm_batch_eval_sections
from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import FunctionCall
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from sqlalchemy.orm import Session

from danswer.access.access import get_acl_for_user
from danswer.context.search.models import IndexFilters
from danswer.db.models import User
from danswer.search.models import IndexFilters


def build_access_filters_for_user(user: User | None, session: Session) -> list[str]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,25 @@
from danswer.configs.chat_configs import HYBRID_ALPHA_KEYWORD
from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.context.search.enums import LLMEvaluationType
from danswer.context.search.enums import RecencyBiasSetting
from danswer.context.search.enums import SearchType
from danswer.context.search.models import BaseFilters
from danswer.context.search.models import IndexFilters
from danswer.context.search.models import RerankingDetails
from danswer.context.search.models import SearchQuery
from danswer.context.search.models import SearchRequest
from danswer.context.search.preprocessing.access_filters import (
build_access_filters_for_user,
)
from danswer.context.search.retrieval.search_runner import (
remove_stop_words_and_punctuation,
)
from danswer.db.engine import CURRENT_TENANT_ID_CONTEXTVAR
from danswer.db.models import User
from danswer.db.search_settings import get_current_search_settings
from danswer.llm.interfaces import LLM
from danswer.natural_language_processing.search_nlp_models import QueryAnalysisModel
from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import RecencyBiasSetting
from danswer.search.enums import SearchType
from danswer.search.models import BaseFilters
from danswer.search.models import IndexFilters
from danswer.search.models import RerankingDetails
from danswer.search.models import SearchQuery
from danswer.search.models import SearchRequest
from danswer.search.preprocessing.access_filters import build_access_filters_for_user
from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation
from danswer.secondary_llm_flows.source_filter import extract_source_filter
from danswer.secondary_llm_flows.time_filter import extract_time_filter
from danswer.utils.logger import setup_logger
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
from nltk.tokenize import word_tokenize # type:ignore
from sqlalchemy.orm import Session

from danswer.context.search.models import ChunkMetric
from danswer.context.search.models import IndexFilters
from danswer.context.search.models import InferenceChunk
from danswer.context.search.models import InferenceChunkUncleaned
from danswer.context.search.models import InferenceSection
from danswer.context.search.models import MAX_METRICS_CONTENT
from danswer.context.search.models import RetrievalMetricsContainer
from danswer.context.search.models import SearchQuery
from danswer.context.search.postprocessing.postprocessing import cleanup_chunks
from danswer.context.search.utils import inference_section_from_chunks
from danswer.db.search_settings import get_current_search_settings
from danswer.db.search_settings import get_multilingual_expansion
from danswer.document_index.interfaces import DocumentIndex
Expand All @@ -14,16 +24,6 @@
replace_invalid_doc_id_characters,
)
from danswer.natural_language_processing.search_nlp_models import EmbeddingModel
from danswer.search.models import ChunkMetric
from danswer.search.models import IndexFilters
from danswer.search.models import InferenceChunk
from danswer.search.models import InferenceChunkUncleaned
from danswer.search.models import InferenceSection
from danswer.search.models import MAX_METRICS_CONTENT
from danswer.search.models import RetrievalMetricsContainer
from danswer.search.models import SearchQuery
from danswer.search.postprocessing.postprocessing import cleanup_chunks
from danswer.search.utils import inference_section_from_chunks
from danswer.secondary_llm_flows.query_expansion import multilingual_query_expansion
from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import cast

from danswer.configs.constants import KV_SEARCH_SETTINGS
from danswer.context.search.models import SavedSearchSettings
from danswer.key_value_store.factory import get_kv_store
from danswer.key_value_store.interface import KvKeyNotFoundError
from danswer.search.models import SavedSearchSettings
from danswer.utils.logger import setup_logger

logger = setup_logger()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from typing import TypeVar

from danswer.chat.models import SectionRelevancePiece
from danswer.context.search.models import InferenceChunk
from danswer.context.search.models import InferenceSection
from danswer.context.search.models import SavedSearchDoc
from danswer.context.search.models import SavedSearchDocWithContent
from danswer.context.search.models import SearchDoc
from danswer.db.models import SearchDoc as DBSearchDoc
from danswer.search.models import InferenceChunk
from danswer.search.models import InferenceSection
from danswer.search.models import SavedSearchDoc
from danswer.search.models import SavedSearchDocWithContent
from danswer.search.models import SearchDoc


T = TypeVar(
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/danswerbot/slack/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import SearchFeedbackType
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
from danswer.context.search.models import SavedSearchDoc
from danswer.danswerbot.slack.constants import DISLIKE_BLOCK_ACTION_ID
from danswer.danswerbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
Expand All @@ -31,7 +32,6 @@
from danswer.danswerbot.slack.utils import build_feedback_id
from danswer.danswerbot.slack.utils import remove_slack_text_interactions
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
from danswer.search.models import SavedSearchDoc
from danswer.utils.text_processing import decode_escapes
from danswer.utils.text_processing import replace_whitespaces_w_space

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
from danswer.configs.danswerbot_configs import DANSWER_FOLLOWUP_EMOJI
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
from danswer.context.search.enums import OptionalSearchSetting
from danswer.context.search.models import BaseFilters
from danswer.context.search.models import RerankingDetails
from danswer.context.search.models import RetrievalDetails
from danswer.danswerbot.slack.blocks import build_documents_blocks
from danswer.danswerbot.slack.blocks import build_follow_up_block
from danswer.danswerbot.slack.blocks import build_qa_response_blocks
Expand Down Expand Up @@ -48,10 +52,6 @@
from danswer.one_shot_answer.answer_question import get_search_answer
from danswer.one_shot_answer.models import DirectQARequest
from danswer.one_shot_answer.models import OneShotQAResponse
from danswer.search.enums import OptionalSearchSetting
from danswer.search.models import BaseFilters
from danswer.search.models import RerankingDetails
from danswer.search.models import RetrievalDetails
from danswer.utils.logger import DanswerLoggingAdapter


Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/danswerbot/slack/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL
from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
from danswer.connectors.slack.utils import expert_info_from_slack_id
from danswer.context.search.retrieval.search_runner import download_nltk_data
from danswer.danswerbot.slack.config import get_slack_channel_config_for_bot_and_channel
from danswer.danswerbot.slack.config import MAX_TENANTS_PER_POD
from danswer.danswerbot.slack.config import TENANT_ACQUISITION_INTERVAL
Expand Down Expand Up @@ -75,7 +76,6 @@
from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from danswer.one_shot_answer.models import ThreadMessage
from danswer.redis.redis_pool import get_redis_client
from danswer.search.retrieval.search_runner import download_nltk_data
from danswer.server.manage.models import SlackBotTokens
from danswer.utils.logger import setup_logger
from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable
Expand Down
6 changes: 3 additions & 3 deletions backend/danswer/db/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
from danswer.chat.models import DocumentRelevance
from danswer.configs.chat_configs import HARD_DELETE_CHATS
from danswer.configs.constants import MessageType
from danswer.context.search.models import RetrievalDocs
from danswer.context.search.models import SavedSearchDoc
from danswer.context.search.models import SearchDoc as ServerSearchDoc
from danswer.db.models import ChatMessage
from danswer.db.models import ChatMessage__SearchDoc
from danswer.db.models import ChatSession
Expand All @@ -31,9 +34,6 @@
from danswer.file_store.models import FileDescriptor
from danswer.llm.override_models import LLMOverride
from danswer.llm.override_models import PromptOverride
from danswer.search.models import RetrievalDocs
from danswer.search.models import SavedSearchDoc
from danswer.search.models import SearchDoc as ServerSearchDoc
from danswer.server.query_and_chat.models import ChatMessageDetail
from danswer.tools.tool_runner import ToolCallFinalResult
from danswer.utils.logger import setup_logger
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
from danswer.file_store.models import FileDescriptor
from danswer.llm.override_models import LLMOverride
from danswer.llm.override_models import PromptOverride
from danswer.search.enums import RecencyBiasSetting
from danswer.context.search.enums import RecencyBiasSetting
from danswer.utils.encryption import decrypt_bytes_to_string
from danswer.utils.encryption import encrypt_string_to_bytes
from danswer.utils.headers import HeaderItemDict
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/db/persona.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from danswer.configs.chat_configs import BING_API_KEY
from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW
from danswer.context.search.enums import RecencyBiasSetting
from danswer.db.constants import SLACK_BOT_PERSONA_PREFIX
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import DocumentSet
Expand All @@ -33,7 +34,6 @@
from danswer.db.models import User
from danswer.db.models import User__UserGroup
from danswer.db.models import UserGroup
from danswer.search.enums import RecencyBiasSetting
from danswer.server.features.persona.models import CreatePersonaRequest
from danswer.server.features.persona.models import PersonaSnapshot
from danswer.utils.logger import setup_logger
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/db/search_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from danswer.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
from danswer.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
from danswer.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
from danswer.context.search.models import SavedSearchSettings
from danswer.db.engine import get_session_with_default_tenant
from danswer.db.llm import fetch_embedding_provider
from danswer.db.models import CloudEmbeddingProvider
Expand All @@ -21,7 +22,6 @@
from danswer.indexing.models import IndexingSetting
from danswer.natural_language_processing.search_nlp_models import clean_model_name
from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder
from danswer.search.models import SavedSearchSettings
from danswer.server.manage.embedding.models import (
CloudEmbeddingProvider as ServerCloudEmbeddingProvider,
)
Expand Down
Loading

0 comments on commit b576556

Please sign in to comment.