From 8ed46eff1e2e9abfe231db99d12d17fb82780718 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 15:30:45 -0700 Subject: [PATCH 01/17] initial bones --- .../common_utils/managers/tenant.py | 54 +++++++++++++++++++ .../integration/common_utils/managers/user.py | 15 ++++-- .../tests/integration/common_utils/reset.py | 18 +++++-- backend/tests/integration/conftest.py | 5 ++ 4 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 backend/tests/integration/common_utils/managers/tenant.py diff --git a/backend/tests/integration/common_utils/managers/tenant.py b/backend/tests/integration/common_utils/managers/tenant.py new file mode 100644 index 00000000000..dc3b0929301 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/tenant.py @@ -0,0 +1,54 @@ +import requests + +from danswer.server.manage.models import AllUsersResponse +from danswer.server.models import FullUserSnapshot +from danswer.server.models import InvitedUserSnapshot +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestUser + + +class TenantManager: + @staticmethod + def create( + tenant_id: str | None = None, + initial_admin_email: str | None = None, + ) -> DATestUser: + body = { + "tenant_id": tenant_id, + "owner_email": initial_admin_email, + } + + response = requests.post( + url=f"{API_SERVER_URL}/tenants/create", + json=body, + headers=GENERAL_HEADERS, + ) + + response.raise_for_status() + + return response.json() + + @staticmethod + def wai(user: DATestUser, user_to_perform_action: DATestUser | None = None) -> None: + if user_to_perform_action is None: + user_to_perform_action = user + response = requests.get( + url=f"{API_SERVER_URL}/manage/users", + headers=user_to_perform_action.headers + if user_to_perform_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + data = response.json() + all_users = AllUsersResponse( + accepted=[FullUserSnapshot(**user) for user in data["accepted"]], + invited=[InvitedUserSnapshot(**user) for user in data["invited"]], + accepted_pages=data["accepted_pages"], + invited_pages=data["invited_pages"], + ) + for accepted_user in all_users.accepted: + if accepted_user.email == user.email and accepted_user.id == user.id: + return + raise ValueError(f"User {user.email} not found") diff --git a/backend/tests/integration/common_utils/managers/user.py b/backend/tests/integration/common_utils/managers/user.py index 4dcafc23cf9..34aeaf865c9 100644 --- a/backend/tests/integration/common_utils/managers/user.py +++ b/backend/tests/integration/common_utils/managers/user.py @@ -65,15 +65,22 @@ def login_as_user(test_user: DATestUser) -> DATestUser: data=data, headers=headers, ) + response.raise_for_status() - result_cookie = next(iter(response.cookies), None) - if not result_cookie: + cookies = response.cookies.get_dict() + session_cookie = cookies.get("session") + tenant_details_cookie = cookies.get("tenant_details") + + if not session_cookie: raise Exception("Failed to login") print(f"Logged in as {test_user.email}") - cookie = f"{result_cookie.name}={result_cookie.value}" - test_user.headers["Cookie"] = cookie + + # Set both cookies in the headers + test_user.headers["Cookie"] = ( + f"session={session_cookie}; " f"tenant_details={tenant_details_cookie}" + ) return test_user @staticmethod diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index a532406c4cd..6713ab7cbf5 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -1,5 +1,6 @@ import logging import time +from types import SimpleNamespace import psycopg2 import requests @@ -26,7 +27,10 @@ def _run_migrations( - database_url: str, direction: str = "upgrade", revision: str = "head" + database_url: str, + direction: str = "upgrade", + revision: str = "head", + schema: str = "public", ) -> None: # hide info logs emitted during migration logging.getLogger("alembic").setLevel(logging.CRITICAL) @@ -36,6 +40,9 @@ def _run_migrations( alembic_cfg.set_section_option("logger_alembic", "level", "WARN") alembic_cfg.attributes["configure_logger"] = False + alembic_cfg.cmd_opts = SimpleNamespace() # type: ignore + alembic_cfg.cmd_opts.x = [f"schema={schema}"] # type: ignore + # Set the SQLAlchemy URL in the Alembic configuration alembic_cfg.set_main_option("sqlalchemy.url", database_url) @@ -52,7 +59,7 @@ def _run_migrations( logging.getLogger("alembic").setLevel(logging.INFO) -def reset_postgres(database: str = "postgres") -> None: +def reset_postgres(database: str = "postgres", multitenant: bool = False) -> None: """Reset the Postgres database.""" # NOTE: need to delete all rows to allow migrations to be rolled back @@ -127,6 +134,7 @@ def reset_postgres(database: str = "postgres") -> None: def reset_vespa() -> None: """Wipe all data from the Vespa index.""" + with get_session_context_manager() as db_session: # swap to the correct default model check_index_swap(db_session) @@ -166,10 +174,10 @@ def reset_vespa() -> None: time.sleep(5) -def reset_all() -> None: +def reset_all(multitenant: bool = False) -> None: """Reset both Postgres and Vespa.""" logger.info("Resetting Postgres...") - reset_postgres() + reset_postgres(multitenant=multitenant) logger.info("Resetting Vespa...") - reset_vespa() + reset_vespa(multitenant=multitenant) logger.info("Finished resetting all.") diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index 77d9e0e7022..d86d0162ce4 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -44,3 +44,8 @@ def vespa_client(db_session: Session) -> vespa_fixture: @pytest.fixture def reset() -> None: reset_all() + + +@pytest.fixture +def reset_multitenant() -> None: + reset_all(multitenant=True) From ef6014c1723a1a054625142095d6870f5d84f44b Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 17:30:04 -0700 Subject: [PATCH 02/17] kk --- .github/workflows/pr-Integration-tests.yml | 55 +++++++++++++++++++++- backend/tests/integration/Dockerfile | 3 +- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index 0e4856f7194..597494f4cfb 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -130,7 +130,7 @@ jobs: done echo "Finished waiting for service." - - name: Run integration tests + - name: Run Standard Integration Tests run: | echo "Running integration tests..." docker run --rm --network danswer-stack_default \ @@ -145,7 +145,8 @@ jobs: -e OPENAI_API_KEY=${OPENAI_API_KEY} \ -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ -e TEST_WEB_HOSTNAME=test-runner \ - danswer/danswer-integration:test + danswer/danswer-integration:test \ + /app/tests/integration/tests continue-on-error: true id: run_tests @@ -158,6 +159,56 @@ jobs: echo "All integration tests passed successfully." fi + - name: Stop Docker containers + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack down -v + + # Start containers for multi-tenant tests + - name: Start Docker containers for multi-tenant tests + run: | + cd deployment/docker_compose + ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ + MULTI_TENANT=true \ + AUTH_TYPE=basic \ # Adjust as needed + REQUIRE_EMAIL_VERIFICATION=false \ + DISABLE_TELEMETRY=true \ + IMAGE_TAG=test \ + docker compose -f docker-compose.dev.yml -p danswer-stack up -d + id: start_docker_multi_tenant + + # In practice, `cloud` Auth type would require OAUTH credentials to be set. + - name: Run Multi-Tenant Integration Tests + run: | + echo "Running integration tests..." + docker run --rm --network danswer-stack_default \ + --name test-runner \ + -e POSTGRES_HOST=relational_db \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_DB=postgres \ + -e VESPA_HOST=index \ + -e REDIS_HOST=cache \ + -e API_SERVER_HOST=api_server \ + -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ + -e TEST_WEB_HOSTNAME=test-runner \ + -e AUTH_TYPE=cloud \ + -e MULTI_TENANT=true \ + danswer/danswer-integration:test \ + /app/tests/integration/multitenant_tests + continue-on-error: true + id: run_tests + + - name: Check multi-tenant test results + run: | + if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then + echo "Integration tests failed. Exiting with error." + exit 1 + else + echo "All integration tests passed successfully." + fi + - name: Save Docker logs if: success() || failure() run: | diff --git a/backend/tests/integration/Dockerfile b/backend/tests/integration/Dockerfile index 02cdcad0b44..92a94a32d48 100644 --- a/backend/tests/integration/Dockerfile +++ b/backend/tests/integration/Dockerfile @@ -83,4 +83,5 @@ COPY ./tests/integration /app/tests/integration ENV PYTHONPATH=/app -CMD ["pytest", "-s", "/app/tests/integration"] +ENTRYPOINT ["pytest", "-s"] +CMD ["/app/tests/integration"] \ No newline at end of file From 57110508b58ea3879893d2e6995d9026f2ce9940 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 18:16:49 -0700 Subject: [PATCH 03/17] k --- backend/danswer/configs/app_configs.py | 2 +- .../common_utils/managers/tenant.py | 48 ++++++-- .../tests/integration/common_utils/reset.py | 107 +++++++++++++++++- .../tenants/test_tenant_creation.py | 5 + .../docker_compose/docker-compose.dev.yml | 2 +- .../docker_compose/docker-compose.gpu-dev.yml | 2 +- .../docker-compose.search-testing.yml | 2 +- 7 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index caf7a103b94..505268b6edd 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -140,7 +140,7 @@ os.environ.get("POSTGRES_PASSWORD") or "password" ) POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" -POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" +POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5433" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" POSTGRES_API_SERVER_POOL_SIZE = int( diff --git a/backend/tests/integration/common_utils/managers/tenant.py b/backend/tests/integration/common_utils/managers/tenant.py index dc3b0929301..76fd16471f8 100644 --- a/backend/tests/integration/common_utils/managers/tenant.py +++ b/backend/tests/integration/common_utils/managers/tenant.py @@ -1,3 +1,7 @@ +from datetime import datetime +from datetime import timedelta + +import jwt import requests from danswer.server.manage.models import AllUsersResponse @@ -8,21 +12,39 @@ from tests.integration.common_utils.test_models import DATestUser +def generate_auth_token() -> str: + payload = { + "iss": "control_plane", + "exp": datetime.utcnow() + timedelta(minutes=5), + "iat": datetime.utcnow(), + "scope": "tenant:create", + } + token = jwt.encode(payload, "", algorithm="HS256") + return token + + class TenantManager: @staticmethod def create( tenant_id: str | None = None, initial_admin_email: str | None = None, - ) -> DATestUser: + ) -> dict[str, str]: body = { "tenant_id": tenant_id, - "owner_email": initial_admin_email, + "initial_admin_email": initial_admin_email, + } + + token = generate_auth_token() + headers = { + "Authorization": f"Bearer {token}", + "X-API-KEY": "", + "Content-Type": "application/json", } response = requests.post( url=f"{API_SERVER_URL}/tenants/create", json=body, - headers=GENERAL_HEADERS, + headers=headers, ) response.raise_for_status() @@ -30,25 +52,31 @@ def create( return response.json() @staticmethod - def wai(user: DATestUser, user_to_perform_action: DATestUser | None = None) -> None: - if user_to_perform_action is None: - user_to_perform_action = user + def get_all_users( + user_performing_action: DATestUser | None = None, + ) -> AllUsersResponse: response = requests.get( url=f"{API_SERVER_URL}/manage/users", - headers=user_to_perform_action.headers - if user_to_perform_action + headers=user_performing_action.headers + if user_performing_action else GENERAL_HEADERS, ) response.raise_for_status() data = response.json() - all_users = AllUsersResponse( + return AllUsersResponse( accepted=[FullUserSnapshot(**user) for user in data["accepted"]], invited=[InvitedUserSnapshot(**user) for user in data["invited"]], accepted_pages=data["accepted_pages"], invited_pages=data["invited_pages"], ) + + @staticmethod + def verify_user_in_tenant( + user: DATestUser, user_performing_action: DATestUser | None = None + ) -> None: + all_users = TenantManager.get_all_users(user_performing_action) for accepted_user in all_users.accepted: if accepted_user.email == user.email and accepted_user.id == user.id: return - raise ValueError(f"User {user.email} not found") + raise ValueError(f"User {user.email} not found in tenant") diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 6713ab7cbf5..28f577e0311 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -7,12 +7,14 @@ from alembic import command from alembic.config import Config +from danswer.background.celery.celery_utils import get_all_tenant_ids from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER from danswer.db.engine import build_connection_string from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_session_with_tenant from danswer.db.engine import SYNC_DB_API from danswer.db.search_settings import get_current_search_settings from danswer.db.swap_index import check_index_swap @@ -28,6 +30,7 @@ def _run_migrations( database_url: str, + config_name: str, direction: str = "upgrade", revision: str = "head", schema: str = "public", @@ -39,6 +42,7 @@ def _run_migrations( alembic_cfg = Config("alembic.ini") alembic_cfg.set_section_option("logger_alembic", "level", "WARN") alembic_cfg.attributes["configure_logger"] = False + alembic_cfg.config_ini_section = config_name alembic_cfg.cmd_opts = SimpleNamespace() # type: ignore alembic_cfg.cmd_opts.x = [f"schema={schema}"] # type: ignore @@ -59,7 +63,9 @@ def _run_migrations( logging.getLogger("alembic").setLevel(logging.INFO) -def reset_postgres(database: str = "postgres", multitenant: bool = False) -> None: +def reset_postgres( + database: str = "postgres", config_name: str = "alembic", setup_danswer: bool = True +) -> None: """Reset the Postgres database.""" # NOTE: need to delete all rows to allow migrations to be rolled back @@ -118,14 +124,18 @@ def reset_postgres(database: str = "postgres", multitenant: bool = False) -> Non ) _run_migrations( conn_str, + config_name, direction="downgrade", revision="base", ) _run_migrations( conn_str, + config_name, direction="upgrade", revision="head", ) + if setup_danswer: + return # do the same thing as we do on API server startup with get_session_context_manager() as db_session: @@ -174,10 +184,97 @@ def reset_vespa() -> None: time.sleep(5) +def reset_postgres_multitenant() -> None: + """Reset the Postgres database for all tenants in a multitenant setup.""" + + conn = psycopg2.connect( + dbname="postgres", + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + host=POSTGRES_HOST, + port=POSTGRES_PORT, + ) + conn.autocommit = True + cur = conn.cursor() + + # Get all tenant schemas + cur.execute( + """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name LIKE 'tenant_%' + """ + ) + tenant_schemas = cur.fetchall() + + # Drop all tenant schemas + for schema in tenant_schemas: + schema_name = schema[0] + cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE') + + cur.close() + conn.close() + + reset_postgres(config_name="schema_private") + + +def reset_vespa_multitenant() -> None: + """Wipe all data from the Vespa index for all tenants.""" + + for tenant_id in get_all_tenant_ids(): + with get_session_with_tenant(tenant_id=tenant_id) as db_session: + # swap to the correct default model for each tenant + check_index_swap(db_session) + + search_settings = get_current_search_settings(db_session) + index_name = search_settings.index_name + + success = setup_vespa( + document_index=VespaIndex(index_name=index_name, secondary_index_name=None), + index_setting=IndexingSetting.from_db_model(search_settings), + secondary_index_setting=None, + ) + + if not success: + raise RuntimeError( + f"Could not connect to Vespa for tenant {tenant_id} within the specified timeout." + ) + + for _ in range(5): + try: + continuation = None + should_continue = True + while should_continue: + params = {"selection": "true", "cluster": "danswer_index"} + if continuation: + params = {**params, "continuation": continuation} + response = requests.delete( + DOCUMENT_ID_ENDPOINT.format(index_name=index_name), + params=params, + ) + response.raise_for_status() + + response_json = response.json() + + continuation = response_json.get("continuation") + should_continue = bool(continuation) + + break + except Exception as e: + print(f"Error deleting documents for tenant {tenant_id}: {e}") + time.sleep(5) + + def reset_all(multitenant: bool = False) -> None: """Reset both Postgres and Vespa.""" - logger.info("Resetting Postgres...") - reset_postgres(multitenant=multitenant) - logger.info("Resetting Vespa...") - reset_vespa(multitenant=multitenant) + if multitenant: + logger.info("Resetting Postgres for all tenants...") + reset_postgres_multitenant() + logger.info("Resetting Vespa for all tenants...") + reset_vespa_multitenant() + else: + logger.info("Resetting Postgres...") + reset_postgres() + logger.info("Resetting Vespa...") + reset_vespa() logger.info("Finished resetting all.") diff --git a/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py new file mode 100644 index 00000000000..4ad19f4da3d --- /dev/null +++ b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py @@ -0,0 +1,5 @@ +from tests.integration.common_utils.managers.tenant import TenantManager + + +def test_tenant_creation(reset_multitenant: None) -> None: + TenantManager.create("tenant_dev", "test@test.com") diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index d22bde5b465..109892f9db9 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -313,7 +313,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index a7e0a2afe97..ea40c2fef0a 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -313,7 +313,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432:5432" + - "5433:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index fab950c064e..2afd54e029c 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -157,7 +157,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5432" + - "5433" volumes: - db_volume:/var/lib/postgresql/data From 18e458c9c5ed8e5ecb53325b15a17da7723f0396 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 22:54:19 -0700 Subject: [PATCH 04/17] k: --- backend/danswer/auth/users.py | 5 +- backend/danswer/configs/app_configs.py | 5 +- backend/danswer/indexing/indexing_pipeline.py | 1 + .../danswer/server/danswer_api/ingestion.py | 6 + .../server/middleware/tenant_tracking.py | 3 +- .../integration/common_utils/managers/chat.py | 2 +- .../common_utils/managers/credential.py | 1 + .../integration/common_utils/managers/user.py | 5 +- .../integration/multitenant_tests/cc_Pair | 0 .../syncing/test_search_permissions.py | 154 ++++++++++++++++++ .../tenants/test_tenant_creation.py | 36 ++++ .../docker_compose/docker-compose.dev.yml | 2 +- .../docker_compose/docker-compose.gpu-dev.yml | 2 +- .../docker-compose.search-testing.yml | 2 +- 14 files changed, 215 insertions(+), 9 deletions(-) create mode 100644 backend/tests/integration/multitenant_tests/cc_Pair create mode 100644 backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 4565073b6af..31179714095 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -58,6 +58,7 @@ from danswer.auth.schemas import UserUpdate from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_AUTH +from danswer.configs.app_configs import DISABLE_VERIFICATION from danswer.configs.app_configs import EMAIL_FROM from danswer.configs.app_configs import MULTI_TENANT from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION @@ -132,7 +133,9 @@ def get_display_email(email: str | None, space_less: bool = False) -> str: def user_needs_to_be_verified() -> bool: # all other auth types besides basic should require users to be # verified - return AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION + return not DISABLE_VERIFICATION and ( + AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION + ) def verify_email_is_invited(email: str) -> None: diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 505268b6edd..5be2b68d2a4 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -43,6 +43,9 @@ AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower()) DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED +# Necessary for cloud integration tests +DISABLE_VERIFICATION = os.environ.get("DISABLE_VERIFICATION", "").lower() == "true" + # Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive # information. This provides an extra layer of security on top of Postgres access controls # and is available in Danswer EE @@ -140,7 +143,7 @@ os.environ.get("POSTGRES_PASSWORD") or "password" ) POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" -POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5433" +POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" POSTGRES_API_SERVER_POOL_SIZE = int( diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index d40bd341fdf..22036d54314 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -379,6 +379,7 @@ def build_indexing_pipeline( attempt_id: int | None = None, tenant_id: str | None = None, ) -> IndexingPipelineProtocol: + print("INDEXING PIPELINE") """Builds a pipeline which takes in a list (batch) of docs and indexes them.""" search_settings = get_current_search_settings(db_session) multipass = ( diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index cea3ec86575..fb94981f19b 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -9,6 +9,7 @@ from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.document import get_documents_by_cc_pair from danswer.db.document import get_ingestion_documents +from danswer.db.engine import get_current_tenant_id from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.search_settings import get_current_search_settings @@ -67,6 +68,7 @@ def upsert_ingestion_doc( doc_info: IngestionDocument, _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), + tenant_id=Depends(get_current_tenant_id), ) -> IngestionResult: doc_info.document.from_ingestion_api = True @@ -101,6 +103,7 @@ def upsert_ingestion_doc( document_index=curr_doc_index, ignore_time_skip=True, db_session=db_session, + tenant_id=tenant_id, ) new_doc, __chunk_count = indexing_pipeline( @@ -110,6 +113,8 @@ def upsert_ingestion_doc( credential_id=cc_pair.credential_id, ), ) + print("NEW DOC", new_doc) + print("CHUNK COUNT", __chunk_count) # If there's a secondary index being built, index the doc but don't use it for return here if sec_ind_name: @@ -134,6 +139,7 @@ def upsert_ingestion_doc( document_index=sec_doc_index, ignore_time_skip=True, db_session=db_session, + tenant_id=tenant_id, ) sec_ind_pipeline( diff --git a/backend/ee/danswer/server/middleware/tenant_tracking.py b/backend/ee/danswer/server/middleware/tenant_tracking.py index f564a4fc683..b43a75b23df 100644 --- a/backend/ee/danswer/server/middleware/tenant_tracking.py +++ b/backend/ee/danswer/server/middleware/tenant_tracking.py @@ -22,8 +22,9 @@ async def set_tenant_id( ) -> Response: try: logger.info(f"Request route: {request.url.path}") - + logger.info(f"Request cookies: {request.cookies}") if not MULTI_TENANT: + logger.info("SETITNG TO DEFAULt") tenant_id = POSTGRES_DEFAULT_SCHEMA else: token = request.cookies.get("tenant_details") diff --git a/backend/tests/integration/common_utils/managers/chat.py b/backend/tests/integration/common_utils/managers/chat.py index a8643e9e83d..a2edb32caec 100644 --- a/backend/tests/integration/common_utils/managers/chat.py +++ b/backend/tests/integration/common_utils/managers/chat.py @@ -23,7 +23,7 @@ class ChatSessionManager: @staticmethod def create( - persona_id: int = -1, + persona_id: int = 0, description: str = "Test chat session", user_performing_action: DATestUser | None = None, ) -> DATestChatSession: diff --git a/backend/tests/integration/common_utils/managers/credential.py b/backend/tests/integration/common_utils/managers/credential.py index 8f729e4b06c..8c8a59d4856 100644 --- a/backend/tests/integration/common_utils/managers/credential.py +++ b/backend/tests/integration/common_utils/managers/credential.py @@ -32,6 +32,7 @@ def create( "curator_public": curator_public, "groups": groups or [], } + response = requests.post( url=f"{API_SERVER_URL}/manage/credential", json=credential_request, diff --git a/backend/tests/integration/common_utils/managers/user.py b/backend/tests/integration/common_utils/managers/user.py index 34aeaf865c9..ecc6d3206b9 100644 --- a/backend/tests/integration/common_utils/managers/user.py +++ b/backend/tests/integration/common_utils/managers/user.py @@ -69,7 +69,7 @@ def login_as_user(test_user: DATestUser) -> DATestUser: response.raise_for_status() cookies = response.cookies.get_dict() - session_cookie = cookies.get("session") + session_cookie = cookies.get("fastapiusersauth") tenant_details_cookie = cookies.get("tenant_details") if not session_cookie: @@ -79,7 +79,8 @@ def login_as_user(test_user: DATestUser) -> DATestUser: # Set both cookies in the headers test_user.headers["Cookie"] = ( - f"session={session_cookie}; " f"tenant_details={tenant_details_cookie}" + f"fastapiusersauth={session_cookie}; " + f"tenant_details={tenant_details_cookie}" ) return test_user diff --git a/backend/tests/integration/multitenant_tests/cc_Pair b/backend/tests/integration/multitenant_tests/cc_Pair new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py new file mode 100644 index 00000000000..5281f66376f --- /dev/null +++ b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py @@ -0,0 +1,154 @@ +from danswer.db.models import UserRole +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.chat import ChatSessionManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.tenant import TenantManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestChatSession +from tests.integration.common_utils.test_models import DATestUser + + +def test_multi_tenant_access_control(reset_multitenant: None) -> None: + # Create Tenant 1 and its Admin User + TenantManager.create("tenant_dev1", "test1@test.com") + test_user1: DATestUser = UserManager.create(name="test1", email="test1@test.com") + assert UserManager.verify_role(test_user1, UserRole.ADMIN) + + # Create Tenant 2 and its Admin User + TenantManager.create("tenant_dev2", "test2@test.com") + test_user2: DATestUser = UserManager.create(name="test2", email="test2@test.com") + assert UserManager.verify_role(test_user2, UserRole.ADMIN) + + # Create connectors for Tenant 1 + cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=test_user1, + ) + api_key_1: DATestAPIKey = APIKeyManager.create( + user_performing_action=test_user1, + ) + api_key_1.headers.update(test_user1.headers) + LLMProviderManager.create(user_performing_action=test_user1) + + # Seed documents for Tenant 1 + cc_pair_1.documents = [] + doc1_tenant1 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Tenant 1 Document Content", + api_key=api_key_1, + ) + doc2_tenant1 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Tenant 1 Document Content", + api_key=api_key_1, + ) + cc_pair_1.documents.extend([doc1_tenant1, doc2_tenant1]) + + # Create connectors for Tenant 2 + cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=test_user2, + ) + api_key_2: DATestAPIKey = APIKeyManager.create( + user_performing_action=test_user2, + ) + api_key_2.headers.update(test_user2.headers) + LLMProviderManager.create(user_performing_action=test_user2) + + # Seed documents for Tenant 2 + cc_pair_2.documents = [] + doc1_tenant2 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_2, + content="Tenant 2 Document Content", + api_key=api_key_2, + ) + doc2_tenant2 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_2, + content="Tenant 2 Document Content", + api_key=api_key_2, + ) + cc_pair_2.documents.extend([doc1_tenant2, doc2_tenant2]) + + tenant1_doc_ids = {doc1_tenant1.id, doc2_tenant1.id} + tenant2_doc_ids = {doc1_tenant2.id, doc2_tenant2.id} + + # Create chat sessions for each user + chat_session1: DATestChatSession = ChatSessionManager.create( + user_performing_action=test_user1 + ) + chat_session2: DATestChatSession = ChatSessionManager.create( + user_performing_action=test_user2 + ) + + # User 1 sends a message and gets a response + response1 = ChatSessionManager.send_message( + chat_session_id=chat_session1.id, + message="What is in Tenant 1's documents?", + user_performing_action=test_user1, + ) + # Assert that the search tool was used + assert response1.tool_name == "run_search" + + response_doc_ids = {doc["document_id"] for doc in response1.tool_result} + assert tenant1_doc_ids.issubset( + response_doc_ids + ), "Not all Tenant 1 document IDs are in the response" + assert not response_doc_ids.intersection( + tenant2_doc_ids + ), "Tenant 2 document IDs should not be in the response" + + # Assert that the contents are correct + for doc in response1.tool_result: + assert doc["content"] == "Tenant 1 Document Content" + + # User 2 sends a message and gets a response + response2 = ChatSessionManager.send_message( + chat_session_id=chat_session2.id, + message="What is in Tenant 2's documents?", + user_performing_action=test_user2, + ) + # Assert that the search tool was used + assert response2.tool_name == "run_search" + # Assert that the tool_result contains Tenant 2's documents + response_doc_ids = {doc["document_id"] for doc in response2.tool_result} + assert tenant2_doc_ids.issubset( + response_doc_ids + ), "Not all Tenant 2 document IDs are in the response" + assert not response_doc_ids.intersection( + tenant1_doc_ids + ), "Tenant 1 document IDs should not be in the response" + # Assert that the contents are correct + for doc in response2.tool_result: + assert doc["content"] == "Tenant 2 Document Content" + + # User 1 tries to access Tenant 2's documents + response_cross = ChatSessionManager.send_message( + chat_session_id=chat_session1.id, + message="What is in Tenant 2's documents?", + user_performing_action=test_user1, + ) + # Assert that the search tool was used + assert response_cross.tool_name == "run_search" + # Assert that the tool_result is empty or does not contain Tenant 2's documents + response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result} + # Ensure none of Tenant 2's document IDs are in the response + assert not response_doc_ids.intersection(tenant2_doc_ids) + # Optionally, assert that tool_result is empty + # assert len(response_cross.tool_result) == 0 + + # User 2 tries to access Tenant 1's documents + response_cross2 = ChatSessionManager.send_message( + chat_session_id=chat_session2.id, + message="What is in Tenant 1's documents?", + user_performing_action=test_user2, + ) + # Assert that the search tool was used + assert response_cross2.tool_name == "run_search" + # Assert that the tool_result is empty or does not contain Tenant 1's documents + response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result} + # Ensure none of Tenant 1's document IDs are in the response + assert not response_doc_ids.intersection(tenant1_doc_ids) + # Optionally, assert that tool_result is empty + # assert len(response_cross2.tool_result) == 0 diff --git a/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py index 4ad19f4da3d..6088743e317 100644 --- a/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py +++ b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py @@ -1,5 +1,41 @@ +from danswer.configs.constants import DocumentSource +from danswer.db.enums import AccessType +from danswer.db.models import UserRole +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager from tests.integration.common_utils.managers.tenant import TenantManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestUser +# Test flow from creating tenant to registering as a user def test_tenant_creation(reset_multitenant: None) -> None: TenantManager.create("tenant_dev", "test@test.com") + test_user: DATestUser = UserManager.create(name="test", email="test@test.com") + + assert UserManager.verify_role(test_user, UserRole.ADMIN) + + test_credential = CredentialManager.create( + name="admin_test_credential", + source=DocumentSource.FILE, + curator_public=False, + user_performing_action=test_user, + ) + + test_connector = ConnectorManager.create( + name="admin_test_connector", + source=DocumentSource.FILE, + is_public=False, + user_performing_action=test_user, + ) + + test_cc_pair = CCPairManager.create( + connector_id=test_connector.id, + credential_id=test_credential.id, + name="admin_test_cc_pair", + access_type=AccessType.PRIVATE, + user_performing_action=test_user, + ) + + CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=test_user) diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 109892f9db9..d22bde5b465 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -313,7 +313,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index ea40c2fef0a..a7e0a2afe97 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -313,7 +313,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433:5432" + - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index 2afd54e029c..fab950c064e 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -157,7 +157,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - - "5433" + - "5432" volumes: - db_volume:/var/lib/postgresql/data From fc484ae2b981cf35ac631f1d41105cc167ecd447 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 23:09:04 -0700 Subject: [PATCH 05/17] nit --- backend/tests/integration/Dockerfile | 2 +- .../tests/integration/common_utils/reset.py | 25 ++++++++++--------- backend/tests/integration/conftest.py | 3 ++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/backend/tests/integration/Dockerfile b/backend/tests/integration/Dockerfile index 92a94a32d48..3eecb0d5683 100644 --- a/backend/tests/integration/Dockerfile +++ b/backend/tests/integration/Dockerfile @@ -84,4 +84,4 @@ COPY ./tests/integration /app/tests/integration ENV PYTHONPATH=/app ENTRYPOINT ["pytest", "-s"] -CMD ["/app/tests/integration"] \ No newline at end of file +CMD ["/app/tests/integration", "--ignore=/app/tests/integration/multitenant_tests"] \ No newline at end of file diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 28f577e0311..ac6771fc048 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -265,16 +265,17 @@ def reset_vespa_multitenant() -> None: time.sleep(5) -def reset_all(multitenant: bool = False) -> None: - """Reset both Postgres and Vespa.""" - if multitenant: - logger.info("Resetting Postgres for all tenants...") - reset_postgres_multitenant() - logger.info("Resetting Vespa for all tenants...") - reset_vespa_multitenant() - else: - logger.info("Resetting Postgres...") - reset_postgres() - logger.info("Resetting Vespa...") - reset_vespa() +def reset_all() -> None: + logger.info("Resetting Postgres...") + reset_postgres() + logger.info("Resetting Vespa...") + reset_vespa() + + +def reset_all_multitenant() -> None: + """Reset both Postgres and Vespa for all tenants.""" + logger.info("Resetting Postgres for all tenants...") + reset_postgres_multitenant() + logger.info("Resetting Vespa for all tenants...") + reset_vespa_multitenant() logger.info("Finished resetting all.") diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index d86d0162ce4..1d57b570d56 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -7,6 +7,7 @@ from danswer.db.engine import get_session_context_manager from danswer.db.search_settings import get_current_search_settings from tests.integration.common_utils.reset import reset_all +from tests.integration.common_utils.reset import reset_all_multitenant from tests.integration.common_utils.vespa import vespa_fixture @@ -48,4 +49,4 @@ def reset() -> None: @pytest.fixture def reset_multitenant() -> None: - reset_all(multitenant=True) + reset_all_multitenant() From c81742901ca68d735cfb8e82f091b95ae370c68c Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 20 Oct 2024 23:10:21 -0700 Subject: [PATCH 06/17] nit --- backend/danswer/indexing/indexing_pipeline.py | 1 - backend/danswer/server/danswer_api/ingestion.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 22036d54314..d40bd341fdf 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -379,7 +379,6 @@ def build_indexing_pipeline( attempt_id: int | None = None, tenant_id: str | None = None, ) -> IndexingPipelineProtocol: - print("INDEXING PIPELINE") """Builds a pipeline which takes in a list (batch) of docs and indexes them.""" search_settings = get_current_search_settings(db_session) multipass = ( diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index fb94981f19b..9eb2d53569b 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -113,8 +113,6 @@ def upsert_ingestion_doc( credential_id=cc_pair.credential_id, ), ) - print("NEW DOC", new_doc) - print("CHUNK COUNT", __chunk_count) # If there's a secondary index being built, index the doc but don't use it for return here if sec_ind_name: From 459a723bebf5274d82ac9f584ab14aee3d2ccf9c Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 14:43:49 -0700 Subject: [PATCH 07/17] rebase + update --- backend/danswer/server/danswer_api/ingestion.py | 2 +- backend/tests/integration/common_utils/reset.py | 2 +- .../syncing/test_search_permissions.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index 9eb2d53569b..bae316535c7 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -68,7 +68,7 @@ def upsert_ingestion_doc( doc_info: IngestionDocument, _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), - tenant_id=Depends(get_current_tenant_id), + tenant_id: str = Depends(get_current_tenant_id), ) -> IngestionResult: doc_info.document.from_ingestion_api = True diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index ac6771fc048..186e52d7fc5 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -7,12 +7,12 @@ from alembic import command from alembic.config import Config -from danswer.background.celery.celery_utils import get_all_tenant_ids from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER from danswer.db.engine import build_connection_string +from danswer.db.engine import get_all_tenant_ids from danswer.db.engine import get_session_context_manager from danswer.db.engine import get_session_with_tenant from danswer.db.engine import SYNC_DB_API diff --git a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py index 5281f66376f..47d3c1fbc6c 100644 --- a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py +++ b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py @@ -91,7 +91,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: # Assert that the search tool was used assert response1.tool_name == "run_search" - response_doc_ids = {doc["document_id"] for doc in response1.tool_result} + response_doc_ids = {doc["document_id"] for doc in response1.tool_result or []} assert tenant1_doc_ids.issubset( response_doc_ids ), "Not all Tenant 1 document IDs are in the response" @@ -100,7 +100,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: ), "Tenant 2 document IDs should not be in the response" # Assert that the contents are correct - for doc in response1.tool_result: + for doc in response1.tool_result or []: assert doc["content"] == "Tenant 1 Document Content" # User 2 sends a message and gets a response @@ -112,7 +112,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: # Assert that the search tool was used assert response2.tool_name == "run_search" # Assert that the tool_result contains Tenant 2's documents - response_doc_ids = {doc["document_id"] for doc in response2.tool_result} + response_doc_ids = {doc["document_id"] for doc in response2.tool_result or []} assert tenant2_doc_ids.issubset( response_doc_ids ), "Not all Tenant 2 document IDs are in the response" @@ -120,7 +120,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: tenant1_doc_ids ), "Tenant 1 document IDs should not be in the response" # Assert that the contents are correct - for doc in response2.tool_result: + for doc in response2.tool_result or []: assert doc["content"] == "Tenant 2 Document Content" # User 1 tries to access Tenant 2's documents @@ -132,7 +132,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: # Assert that the search tool was used assert response_cross.tool_name == "run_search" # Assert that the tool_result is empty or does not contain Tenant 2's documents - response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result} + response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result or []} # Ensure none of Tenant 2's document IDs are in the response assert not response_doc_ids.intersection(tenant2_doc_ids) # Optionally, assert that tool_result is empty @@ -147,7 +147,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: # Assert that the search tool was used assert response_cross2.tool_name == "run_search" # Assert that the tool_result is empty or does not contain Tenant 1's documents - response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result} + response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result or []} # Ensure none of Tenant 1's document IDs are in the response assert not response_doc_ids.intersection(tenant1_doc_ids) # Optionally, assert that tool_result is empty From 69ba1abcf2d1214b9e2b78be40bae63770a5b610 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 14:48:00 -0700 Subject: [PATCH 08/17] nit --- backend/ee/danswer/server/middleware/tenant_tracking.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/ee/danswer/server/middleware/tenant_tracking.py b/backend/ee/danswer/server/middleware/tenant_tracking.py index b43a75b23df..2f507c16772 100644 --- a/backend/ee/danswer/server/middleware/tenant_tracking.py +++ b/backend/ee/danswer/server/middleware/tenant_tracking.py @@ -22,7 +22,6 @@ async def set_tenant_id( ) -> Response: try: logger.info(f"Request route: {request.url.path}") - logger.info(f"Request cookies: {request.cookies}") if not MULTI_TENANT: logger.info("SETITNG TO DEFAULt") tenant_id = POSTGRES_DEFAULT_SCHEMA From 1bc2c3d7f11eac277a3d4f5a0e0024e03350cc12 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 15:56:53 -0700 Subject: [PATCH 09/17] minior update --- .../multitenant_tests/syncing/test_search_permissions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py index 47d3c1fbc6c..454b02412d4 100644 --- a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py +++ b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py @@ -135,8 +135,6 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result or []} # Ensure none of Tenant 2's document IDs are in the response assert not response_doc_ids.intersection(tenant2_doc_ids) - # Optionally, assert that tool_result is empty - # assert len(response_cross.tool_result) == 0 # User 2 tries to access Tenant 1's documents response_cross2 = ChatSessionManager.send_message( @@ -150,5 +148,3 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None: response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result or []} # Ensure none of Tenant 1's document IDs are in the response assert not response_doc_ids.intersection(tenant1_doc_ids) - # Optionally, assert that tool_result is empty - # assert len(response_cross2.tool_result) == 0 From c8e0ca3d534aee4762d36534bcf2905cb5316be3 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 17:09:36 -0700 Subject: [PATCH 10/17] k --- .github/workflows/pr-Integration-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index 597494f4cfb..0085bad1e1f 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -198,7 +198,7 @@ jobs: danswer/danswer-integration:test \ /app/tests/integration/multitenant_tests continue-on-error: true - id: run_tests + id: run_multitenant_tests - name: Check multi-tenant test results run: | From 7eb5a9d877fc452f5b22f0c55d82206fc94f6d06 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 17:48:13 -0700 Subject: [PATCH 11/17] minor integration test fixes --- .github/workflows/pr-Integration-tests.yml | 89 +++++++++---------- .../tests/integration/common_utils/reset.py | 4 +- .../tests/dev_apis/test_simple_chat_api.py | 1 + 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index 0085bad1e1f..abb32b327e1 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -73,6 +73,50 @@ jobs: cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + # Start containers for multi-tenant tests + - name: Start Docker containers for multi-tenant tests + run: | + cd deployment/docker_compose + ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ + MULTI_TENANT=true \ + AUTH_TYPE=basic \ # Adjust as needed + REQUIRE_EMAIL_VERIFICATION=false \ + DISABLE_TELEMETRY=true \ + IMAGE_TAG=test \ + docker compose -f docker-compose.dev.yml -p danswer-stack up -d + id: start_docker_multi_tenant + + # In practice, `cloud` Auth type would require OAUTH credentials to be set. + - name: Run Multi-Tenant Integration Tests + run: | + echo "Running integration tests..." + docker run --rm --network danswer-stack_default \ + --name test-runner \ + -e POSTGRES_HOST=relational_db \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_DB=postgres \ + -e VESPA_HOST=index \ + -e REDIS_HOST=cache \ + -e API_SERVER_HOST=api_server \ + -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ + -e TEST_WEB_HOSTNAME=test-runner \ + -e AUTH_TYPE=cloud \ + -e MULTI_TENANT=true \ + danswer/danswer-integration:test \ + /app/tests/integration/multitenant_tests + continue-on-error: true + id: run_multitenant_tests + + - name: Check multi-tenant test results + run: | + if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then + echo "Integration tests failed. Exiting with error." + exit 1 + else + echo "All integration tests passed successfully." + fi - name: Build integration test Docker image uses: ./.github/actions/custom-build-and-push with: @@ -164,51 +208,6 @@ jobs: cd deployment/docker_compose docker compose -f docker-compose.dev.yml -p danswer-stack down -v - # Start containers for multi-tenant tests - - name: Start Docker containers for multi-tenant tests - run: | - cd deployment/docker_compose - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ - MULTI_TENANT=true \ - AUTH_TYPE=basic \ # Adjust as needed - REQUIRE_EMAIL_VERIFICATION=false \ - DISABLE_TELEMETRY=true \ - IMAGE_TAG=test \ - docker compose -f docker-compose.dev.yml -p danswer-stack up -d - id: start_docker_multi_tenant - - # In practice, `cloud` Auth type would require OAUTH credentials to be set. - - name: Run Multi-Tenant Integration Tests - run: | - echo "Running integration tests..." - docker run --rm --network danswer-stack_default \ - --name test-runner \ - -e POSTGRES_HOST=relational_db \ - -e POSTGRES_USER=postgres \ - -e POSTGRES_PASSWORD=password \ - -e POSTGRES_DB=postgres \ - -e VESPA_HOST=index \ - -e REDIS_HOST=cache \ - -e API_SERVER_HOST=api_server \ - -e OPENAI_API_KEY=${OPENAI_API_KEY} \ - -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ - -e TEST_WEB_HOSTNAME=test-runner \ - -e AUTH_TYPE=cloud \ - -e MULTI_TENANT=true \ - danswer/danswer-integration:test \ - /app/tests/integration/multitenant_tests - continue-on-error: true - id: run_multitenant_tests - - - name: Check multi-tenant test results - run: | - if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then - echo "Integration tests failed. Exiting with error." - exit 1 - else - echo "All integration tests passed successfully." - fi - - name: Save Docker logs if: success() || failure() run: | diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 186e52d7fc5..1792af9dbf9 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -134,7 +134,7 @@ def reset_postgres( direction="upgrade", revision="head", ) - if setup_danswer: + if not setup_danswer: return # do the same thing as we do on API server startup @@ -215,7 +215,7 @@ def reset_postgres_multitenant() -> None: cur.close() conn.close() - reset_postgres(config_name="schema_private") + reset_postgres(config_name="schema_private", setup_danswer=False) def reset_vespa_multitenant() -> None: diff --git a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py index 0a4e7b40b57..32ef99b8f42 100644 --- a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py +++ b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py @@ -116,6 +116,7 @@ def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> ) assert response.status_code == 200 response_json = response.json() + # get the db_doc_id of the top document to use as a search doc id for second message first_db_doc_id = response_json["top_documents"][0]["db_doc_id"] From 4dcb7b9d2c99d0d98cc9cfe7ca60fa89f5fc44d2 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 19:07:44 -0700 Subject: [PATCH 12/17] nit --- .github/workflows/pr-Integration-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index abb32b327e1..e2d5ac04027 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -79,7 +79,7 @@ jobs: cd deployment/docker_compose ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ MULTI_TENANT=true \ - AUTH_TYPE=basic \ # Adjust as needed + AUTH_TYPE=basic \ REQUIRE_EMAIL_VERIFICATION=false \ DISABLE_TELEMETRY=true \ IMAGE_TAG=test \ From 306207b173d2618de83cbe6a15e4ee08f13f5a3d Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 23 Oct 2024 19:21:21 -0700 Subject: [PATCH 13/17] ensure we build test docker image --- .github/workflows/pr-Integration-tests.yml | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index e2d5ac04027..e8fc69c10bb 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -72,6 +72,18 @@ jobs: load: true cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + + - name: Build integration test Docker image + uses: ./.github/actions/custom-build-and-push + with: + context: ./backend + file: ./backend/tests/integration/Dockerfile + platforms: linux/amd64 + tags: danswer/danswer-integration:test + push: false + load: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max # Start containers for multi-tenant tests - name: Start Docker containers for multi-tenant tests @@ -116,18 +128,7 @@ jobs: exit 1 else echo "All integration tests passed successfully." - fi - - name: Build integration test Docker image - uses: ./.github/actions/custom-build-and-push - with: - context: ./backend - file: ./backend/tests/integration/Dockerfile - platforms: linux/amd64 - tags: danswer/danswer-integration:test - push: false - load: true - cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + fi - name: Start Docker containers run: | From 17c4e3e5cd074e2227bfe32655aedb8b18c2b613 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Thu, 24 Oct 2024 16:05:48 -0700 Subject: [PATCH 14/17] remove one space --- .github/workflows/pr-Integration-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index e8fc69c10bb..5ff5fe8cf38 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -91,7 +91,7 @@ jobs: cd deployment/docker_compose ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ MULTI_TENANT=true \ - AUTH_TYPE=basic \ + AUTH_TYPE=basic \ REQUIRE_EMAIL_VERIFICATION=false \ DISABLE_TELEMETRY=true \ IMAGE_TAG=test \ From 6fd6a340e738fcd245e3c4a06becff4a198b2a00 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Thu, 24 Oct 2024 16:08:53 -0700 Subject: [PATCH 15/17] k --- .github/workflows/pr-Integration-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index 5ff5fe8cf38..9eb06133c7d 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -130,7 +130,7 @@ jobs: echo "All integration tests passed successfully." fi - - name: Start Docker containers + - name: Start Docker containers run: | cd deployment/docker_compose ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ From f3bee431c940778596e3e62ca8faf36fdeb537fa Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Thu, 24 Oct 2024 18:40:27 -0700 Subject: [PATCH 16/17] ensure we wipe volumes --- .github/workflows/pr-Integration-tests.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-Integration-tests.yml b/.github/workflows/pr-Integration-tests.yml index 9eb06133c7d..03f702f8854 100644 --- a/.github/workflows/pr-Integration-tests.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -130,6 +130,12 @@ jobs: echo "All integration tests passed successfully." fi + - name: Stop multi-tenant Docker containers + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack down -v + + - name: Start Docker containers run: | cd deployment/docker_compose @@ -139,7 +145,7 @@ jobs: DISABLE_TELEMETRY=true \ IMAGE_TAG=test \ docker compose -f docker-compose.dev.yml -p danswer-stack up -d - id: start_docker + id: start_dockerstop m - name: Wait for service to be ready run: | From 330868f7f2d7f35fb3bbf360d59c02d80e78fba3 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Thu, 24 Oct 2024 18:59:28 -0700 Subject: [PATCH 17/17] remove log --- backend/ee/danswer/server/middleware/tenant_tracking.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/ee/danswer/server/middleware/tenant_tracking.py b/backend/ee/danswer/server/middleware/tenant_tracking.py index 2f507c16772..ca798e85a70 100644 --- a/backend/ee/danswer/server/middleware/tenant_tracking.py +++ b/backend/ee/danswer/server/middleware/tenant_tracking.py @@ -23,7 +23,6 @@ async def set_tenant_id( try: logger.info(f"Request route: {request.url.path}") if not MULTI_TENANT: - logger.info("SETITNG TO DEFAULt") tenant_id = POSTGRES_DEFAULT_SCHEMA else: token = request.cookies.get("tenant_details")