From 5b06b53a3e2f1b2b05d70d8976dec0b1a7a4792e Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Danswer)" Date: Tue, 5 Nov 2024 12:30:21 -0800 Subject: [PATCH 1/2] add sentry to spawned indexing task --- .../danswer/background/celery/tasks/indexing/tasks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/danswer/background/celery/tasks/indexing/tasks.py b/backend/danswer/background/celery/tasks/indexing/tasks.py index 7d2e6e0b3dd..6b0e0d329bc 100644 --- a/backend/danswer/background/celery/tasks/indexing/tasks.py +++ b/backend/danswer/background/celery/tasks/indexing/tasks.py @@ -4,6 +4,7 @@ from time import sleep import redis +import sentry_sdk from celery import Celery from celery import shared_task from celery import Task @@ -50,6 +51,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST from shared_configs.configs import INDEXING_MODEL_SERVER_PORT from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import SENTRY_DSN logger = setup_logger() @@ -482,6 +484,15 @@ def connector_indexing_task( that the task transitioned to a "READY" state but the generator_complete_key doesn't exist. This will cause the primary worker to abort the indexing attempt and clean up. """ + if SENTRY_DSN: + sentry_sdk.init( + dsn=SENTRY_DSN, + traces_sample_rate=0.1, + ) + logger.info("Sentry initialized") + else: + logger.debug("Sentry DSN not provided, skipping Sentry initialization") + logger.info( f"Indexing spawned task starting: attempt={index_attempt_id} " f"tenant={tenant_id} " From 583cd14bf4161b37bf290889404742d5174d50fb Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Danswer)" Date: Tue, 5 Nov 2024 16:46:50 -0800 Subject: [PATCH 2/2] comment why we need sentry here --- backend/danswer/background/celery/tasks/indexing/tasks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/danswer/background/celery/tasks/indexing/tasks.py b/backend/danswer/background/celery/tasks/indexing/tasks.py index 188fb528e22..09259acf366 100644 --- a/backend/danswer/background/celery/tasks/indexing/tasks.py +++ b/backend/danswer/background/celery/tasks/indexing/tasks.py @@ -484,6 +484,9 @@ def connector_indexing_task( that the task transitioned to a "READY" state but the generator_complete_key doesn't exist. This will cause the primary worker to abort the indexing attempt and clean up. """ + + # Since connector_indexing_proxy_task spawns a new process using this function as + # the entrypoint, we init Sentry here. if SENTRY_DSN: sentry_sdk.init( dsn=SENTRY_DSN,