Skip to content

Commit

Permalink
Sheet update (onyx-dot-app#3189)
Browse files Browse the repository at this point in the history
* quick pass

* k

* update sheet

* add multiple sheet stuff

* k

* finalized

* update configuration
  • Loading branch information
pablonyx authored and ahmadassaf committed Nov 25, 2024
1 parent d199eaa commit 47d9248
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions backend/danswer/connectors/google_drive/doc_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime
from datetime import timezone

from googleapiclient.discovery import build # type: ignore
from googleapiclient.errors import HttpError # type: ignore

from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
Expand Down Expand Up @@ -48,6 +49,67 @@ def _extract_sections_basic(
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]

try:
if mime_type == GDriveMimeType.SPREADSHEET.value:
try:
sheets_service = build(
"sheets", "v4", credentials=service._http.credentials
)
spreadsheet = (
sheets_service.spreadsheets()
.get(spreadsheetId=file["id"])
.execute()
)

sections = []
for sheet in spreadsheet["sheets"]:
sheet_name = sheet["properties"]["title"]
sheet_id = sheet["properties"]["sheetId"]

# Get sheet dimensions
grid_properties = sheet["properties"].get("gridProperties", {})
row_count = grid_properties.get("rowCount", 1000)
column_count = grid_properties.get("columnCount", 26)

# Convert column count to letter (e.g., 26 -> Z, 27 -> AA)
end_column = ""
while column_count:
column_count, remainder = divmod(column_count - 1, 26)
end_column = chr(65 + remainder) + end_column

range_name = f"'{sheet_name}'!A1:{end_column}{row_count}"

try:
result = (
sheets_service.spreadsheets()
.values()
.get(spreadsheetId=file["id"], range=range_name)
.execute()
)
values = result.get("values", [])

if values:
text = f"Sheet: {sheet_name}\n"
for row in values:
text += "\t".join(str(cell) for cell in row) + "\n"
sections.append(
Section(
link=f"{link}#gid={sheet_id}",
text=text,
)
)
except HttpError as e:
logger.warning(
f"Error fetching data for sheet '{sheet_name}': {e}"
)
continue
return sections

except Exception as e:
logger.warning(
f"Ran into exception '{e}' when pulling data from Google Sheet '{file['name']}'."
" Falling back to basic extraction."
)

if mime_type in [
GDriveMimeType.DOC.value,
GDriveMimeType.PPT.value,
Expand All @@ -65,6 +127,7 @@ def _extract_sections_basic(
.decode("utf-8")
)
return [Section(link=link, text=text)]

elif mime_type in [
GDriveMimeType.PLAIN_TEXT.value,
GDriveMimeType.MARKDOWN.value,
Expand Down

0 comments on commit 47d9248

Please sign in to comment.