From 7d5117167f8b94d0b94b7ad962296882141c22d8 Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 10 Dec 2024 15:38:40 -0800 Subject: [PATCH] Address comments --- backend/danswer/connectors/egnyte/connector.py | 12 ++++++------ backend/danswer/connectors/file/connector.py | 6 +++--- backend/danswer/connectors/interfaces.py | 4 ++-- backend/danswer/file_processing/extract_file_text.py | 4 ++-- backend/danswer/server/documents/standard_oauth.py | 6 ++++-- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/backend/danswer/connectors/egnyte/connector.py b/backend/danswer/connectors/egnyte/connector.py index 0b51c79d313..264e837bf94 100644 --- a/backend/danswer/connectors/egnyte/connector.py +++ b/backend/danswer/connectors/egnyte/connector.py @@ -26,11 +26,11 @@ from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section -from danswer.file_processing.extract_file_text import check_file_ext_is_valid from danswer.file_processing.extract_file_text import detect_encoding from danswer.file_processing.extract_file_text import extract_file_text from danswer.file_processing.extract_file_text import get_file_ext from danswer.file_processing.extract_file_text import is_text_file_extension +from danswer.file_processing.extract_file_text import is_valid_file_ext from danswer.file_processing.extract_file_text import read_text_file from danswer.utils.logger import setup_logger @@ -99,7 +99,7 @@ def _process_egnyte_file( file_name = file_metadata["name"] extension = get_file_ext(file_name) - if not check_file_ext_is_valid(extension): + if not is_valid_file_ext(extension): logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") return None @@ -170,7 +170,7 @@ def oauth_id(cls) -> DocumentSource: return DocumentSource.EGNYTE @classmethod - def redirect_uri(cls, base_domain: str, state: str) -> str: + def oauth_authorization_url(cls, base_domain: str, state: str) -> str: if not EGNYTE_CLIENT_ID: raise ValueError("EGNYTE_CLIENT_ID environment variable must be set") if not EGNYTE_BASE_DOMAIN: @@ -190,7 +190,7 @@ def redirect_uri(cls, base_domain: str, state: str) -> str: ) @classmethod - def code_to_token(cls, code: str) -> dict[str, Any]: + def oauth_code_to_token(cls, code: str) -> dict[str, Any]: if not EGNYTE_CLIENT_ID: raise ValueError("EGNYTE_CLIENT_ID environment variable must be set") if not EGNYTE_CLIENT_SECRET: @@ -342,8 +342,8 @@ def _process_files( yield current_batch current_batch = [] - except Exception as e: - logger.error(f"Failed to process file {file['path']}: {str(e)}") + except Exception: + logger.exception(f"Failed to process file {file['path']}") continue if current_batch: diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index b263354822f..70b7219f65a 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -17,11 +17,11 @@ from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.db.engine import get_session_with_tenant -from danswer.file_processing.extract_file_text import check_file_ext_is_valid from danswer.file_processing.extract_file_text import detect_encoding from danswer.file_processing.extract_file_text import extract_file_text from danswer.file_processing.extract_file_text import get_file_ext from danswer.file_processing.extract_file_text import is_text_file_extension +from danswer.file_processing.extract_file_text import is_valid_file_ext from danswer.file_processing.extract_file_text import load_files_from_zip from danswer.file_processing.extract_file_text import read_pdf_file from danswer.file_processing.extract_file_text import read_text_file @@ -50,7 +50,7 @@ def _read_files_and_metadata( file_content, ignore_dirs=True ): yield os.path.join(directory_path, file_info.filename), file, metadata - elif check_file_ext_is_valid(extension): + elif is_valid_file_ext(extension): yield file_name, file_content, metadata else: logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") @@ -63,7 +63,7 @@ def _process_file( pdf_pass: str | None = None, ) -> list[Document]: extension = get_file_ext(file_name) - if not check_file_ext_is_valid(extension): + if not is_valid_file_ext(extension): logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") return [] diff --git a/backend/danswer/connectors/interfaces.py b/backend/danswer/connectors/interfaces.py index 9dd18acea8b..3ab447a7a88 100644 --- a/backend/danswer/connectors/interfaces.py +++ b/backend/danswer/connectors/interfaces.py @@ -73,12 +73,12 @@ def oauth_id(cls) -> DocumentSource: @classmethod @abc.abstractmethod - def redirect_uri(cls, base_domain: str, state: str) -> str: + def oauth_authorization_url(cls, base_domain: str, state: str) -> str: raise NotImplementedError @classmethod @abc.abstractmethod - def code_to_token(cls, code: str) -> dict[str, Any]: + def oauth_code_to_token(cls, code: str) -> dict[str, Any]: raise NotImplementedError diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py index 428afd9ae52..58016e80d63 100644 --- a/backend/danswer/file_processing/extract_file_text.py +++ b/backend/danswer/file_processing/extract_file_text.py @@ -70,7 +70,7 @@ def get_file_ext(file_path_or_name: str | Path) -> str: return extension -def check_file_ext_is_valid(ext: str) -> bool: +def is_valid_file_ext(ext: str) -> bool: return ext in VALID_FILE_EXTENSIONS @@ -364,7 +364,7 @@ def extract_file_text( elif file_name is not None: final_extension = get_file_ext(file_name) - if check_file_ext_is_valid(final_extension): + if is_valid_file_ext(final_extension): return extension_to_function.get(final_extension, file_io_to_text)(file) # Either the file somehow has no name or the extension is not one that we recognize diff --git a/backend/danswer/server/documents/standard_oauth.py b/backend/danswer/server/documents/standard_oauth.py index 6481ae8e562..4c4b0c497b7 100644 --- a/backend/danswer/server/documents/standard_oauth.py +++ b/backend/danswer/server/documents/standard_oauth.py @@ -86,7 +86,9 @@ def oauth_authorize( ex=_OAUTH_STATE_EXPIRATION_SECONDS, ) - return AuthorizeResponse(redirect_url=connector_cls.redirect_uri(base_url, state)) + return AuthorizeResponse( + redirect_url=connector_cls.oauth_authorization_url(base_url, state) + ) class CallbackResponse(BaseModel): @@ -119,7 +121,7 @@ def oauth_callback( raise HTTPException(status_code=400, detail="Invalid OAuth state") original_url = original_url_bytes.decode("utf-8") - token_info = connector_cls.code_to_token(code) + token_info = connector_cls.oauth_code_to_token(code) # Create a new credential with the token info credential_data = CredentialBase(