diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 1174c8d060f..eaa231e88b7 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -401,6 +401,9 @@ os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]") ) +VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "5") + +SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000") ##### # Enterprise Edition Configs diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 25663e966a3..44a5918d756 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -15,6 +15,7 @@ import requests from danswer.configs.app_configs import DOCUMENT_INDEX_NAME +from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT from danswer.configs.chat_configs import DOC_TIME_DECAY from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.chat_configs import TITLE_CONTENT_RATIO @@ -211,7 +212,7 @@ def index( # indexing / updates / deletes since we have to make a large volume of requests. with ( concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, - httpx.Client(http2=True) as http_client, + httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client, ): # Check for existing documents, existing documents need to have all of their chunks deleted # prior to indexing as the document size (num chunks) may have shrunk @@ -275,7 +276,7 @@ def _update_chunk( # indexing / updates / deletes since we have to make a large volume of requests. with ( concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, - httpx.Client(http2=True) as http_client, + httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client, ): for update_batch in batch_generator(updates, batch_size): future_to_document_id = { @@ -419,7 +420,7 @@ def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int: if self.secondary_index_name: index_names.append(self.secondary_index_name) - with httpx.Client(http2=True) as http_client: + with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client: for index_name in index_names: params = httpx.QueryParams( { @@ -475,7 +476,7 @@ def delete(self, doc_ids: list[str]) -> None: # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for # indexing / updates / deletes since we have to make a large volume of requests. - with httpx.Client(http2=True) as http_client: + with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client: index_names = [self.index_name] if self.secondary_index_name: index_names.append(self.secondary_index_name) @@ -503,7 +504,7 @@ def delete_single(self, doc_id: str) -> int: if self.secondary_index_name: index_names.append(self.secondary_index_name) - with httpx.Client(http2=True) as http_client: + with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client: for index_name in index_names: params = httpx.QueryParams( { diff --git a/backend/danswer/indexing/chunker.py b/backend/danswer/indexing/chunker.py index a25cfc3d32b..9cb4b3e1954 100644 --- a/backend/danswer/indexing/chunker.py +++ b/backend/danswer/indexing/chunker.py @@ -27,6 +27,7 @@ MAX_METADATA_PERCENTAGE = 0.25 CHUNK_MIN_CONTENT = 256 + logger = setup_logger() diff --git a/backend/danswer/main.py b/backend/danswer/main.py index d3aa8b00efd..d7ac6b3c3ed 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -1,3 +1,4 @@ +import sys import traceback from collections.abc import AsyncGenerator from contextlib import asynccontextmanager @@ -32,6 +33,7 @@ from danswer.configs.app_configs import OAUTH_CLIENT_SECRET from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE +from danswer.configs.app_configs import SYSTEM_RECURSION_LIMIT from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import AuthType @@ -140,6 +142,11 @@ def include_router_with_global_prefix_prepended( @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: + # Set recursion limit + if SYSTEM_RECURSION_LIMIT is not None: + sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT) + logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}") + SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME) SqlEngine.init_engine( pool_size=POSTGRES_API_SERVER_POOL_SIZE, diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 860151b3dc4..e2e167520ba 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -25,6 +25,7 @@ from model_server.constants import EmbeddingProvider from model_server.utils import simple_log_function_time from shared_configs.configs import INDEXING_ONLY +from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT from shared_configs.enums import EmbedTextType from shared_configs.enums import RerankerProvider from shared_configs.model_server_models import Embedding @@ -56,7 +57,7 @@ def _initialize_client( api_key: str, provider: EmbeddingProvider, model: str | None = None ) -> Any: if provider == EmbeddingProvider.OPENAI: - return openai.OpenAI(api_key=api_key) + return openai.OpenAI(api_key=api_key, timeout=OPENAI_EMBEDDING_TIMEOUT) elif provider == EmbeddingProvider.COHERE: return CohereClient(api_key=api_key) elif provider == EmbeddingProvider.VOYAGE: diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index e8b599b7795..50233ab6878 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -60,6 +60,9 @@ # notset, debug, info, notice, warning, error, or critical LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice") +# Only used for OpenAI +OPENAI_EMBEDDING_TIMEOUT = int(os.environ.get("OPENAI_EMBEDDING_TIMEOUT", "600")) + # Fields which should only be set on new search setting PRESERVED_SEARCH_FIELDS = [ diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 86d988e7d90..4d0eff8612d 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -281,6 +281,7 @@ services: - INDEXING_ONLY=True # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} + - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} volumes: # Not necessary, this is just to reduce download time during startup - indexing_huggingface_model_cache:/root/.cache/huggingface/ diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index ebce01eadb2..6397f657c19 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -70,6 +70,9 @@ services: - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} + - VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-} + # We do not recommend changing this value + - SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-} # Leave this on pretty please? Nothing sensitive is collected! # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} @@ -252,6 +255,7 @@ services: - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} + - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} volumes: # Not necessary, this is just to reduce download time during startup - model_cache_huggingface:/root/.cache/huggingface/