Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed group sync to account for changes in drive permissions #3666

Merged
merged 4 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,12 @@ def _get_permissions_from_slim_doc(
elif permission_type == "anyone":
public = True

drive_id = permission_info.get("drive_id")
group_ids = group_emails | ({drive_id} if drive_id is not None else set())

return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_emails,
external_user_group_ids=group_ids,
is_public=public,
)

Expand Down
138 changes: 119 additions & 19 deletions backend/ee/onyx/external_permissions/google_drive/group_sync.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,152 @@
from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from onyx.connectors.google_utils.resources import AdminService
from onyx.connectors.google_utils.resources import get_admin_service
from onyx.connectors.google_utils.resources import get_drive_service
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger

logger = setup_logger()


def gdrive_group_sync(
cc_pair: ConnectorCredentialPair,
) -> list[ExternalUserGroup]:
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
admin_service = get_admin_service(
google_drive_connector.creds, google_drive_connector.primary_admin_email
def _get_drive_members(
google_drive_connector: GoogleDriveConnector,
) -> dict[str, tuple[set[str], set[str]]]:
"""
This builds a map of drive ids to their members (group and user emails).
E.g. {
"drive_id_1": ({"group_email_1"}, {"user_email_1", "user_email_2"}),
"drive_id_2": ({"group_email_3"}, {"user_email_3"}),
}
"""
drive_ids = google_drive_connector.get_all_drive_ids()
hagen-danswer marked this conversation as resolved.
Show resolved Hide resolved

drive_id_to_members_map: dict[str, tuple[set[str], set[str]]] = {}
drive_service = get_drive_service(
google_drive_connector.creds,
google_drive_connector.primary_admin_email,
)

onyx_groups: list[ExternalUserGroup] = []
for drive_id in drive_ids:
group_emails: set[str] = set()
user_emails: set[str] = set()
for permission in execute_paginated_retrieval(
drive_service.permissions().list,
list_key="permissions",
fileId=drive_id,
fields="permissions(emailAddress, type)",
supportsAllDrives=True,
):
if permission["type"] == "group":
group_emails.add(permission["emailAddress"])
elif permission["type"] == "user":
user_emails.add(permission["emailAddress"])
drive_id_to_members_map[drive_id] = (group_emails, user_emails)
return drive_id_to_members_map


def _get_all_groups(
admin_service: AdminService,
google_domain: str,
) -> set[str]:
"""
This gets all the group emails.
"""
group_emails: set[str] = set()
for group in execute_paginated_retrieval(
admin_service.groups().list,
list_key="groups",
domain=google_drive_connector.google_domain,
domain=google_domain,
fields="groups(email)",
):
# The id is the group email
group_email = group["email"]
group_emails.add(group["email"])
return group_emails


# Gather group member emails
group_member_emails: list[str] = []
def _map_group_email_to_member_emails(
admin_service: AdminService,
group_emails: set[str],
) -> dict[str, set[str]]:
"""
This maps group emails to their member emails.
"""
group_to_member_map: dict[str, set[str]] = {}
for group_email in group_emails:
group_member_emails: set[str] = set()
for member in execute_paginated_retrieval(
admin_service.members().list,
list_key="members",
groupKey=group_email,
fields="members(email)",
):
group_member_emails.append(member["email"])
group_member_emails.add(member["email"])

if not group_member_emails:
continue
group_to_member_map[group_email] = group_member_emails
return group_to_member_map


def _build_onyx_groups(
drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],
group_email_to_member_emails_map: dict[str, set[str]],
) -> list[ExternalUserGroup]:
onyx_groups: list[ExternalUserGroup] = []

# Convert all drive member definitions to onyx groups
# This is because having drive level access means you have
# irrevocable access to all the files in the drive.
for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():
all_member_emails: set[str] = user_emails
for group_email in group_emails:
all_member_emails.update(group_email_to_member_emails_map[group_email])
onyx_groups.append(
ExternalUserGroup(
id=drive_id,
user_emails=list(all_member_emails),
)
)

# Convert all group member definitions to onyx groups
for group_email, member_emails in group_email_to_member_emails_map.items():
onyx_groups.append(
ExternalUserGroup(
id=group_email,
user_emails=list(group_member_emails),
user_emails=list(member_emails),
)
)

return onyx_groups


def gdrive_group_sync(
cc_pair: ConnectorCredentialPair,
) -> list[ExternalUserGroup]:
# Initialize connector and build credential/service objects
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
)
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
admin_service = get_admin_service(
google_drive_connector.creds, google_drive_connector.primary_admin_email
)

# Get all drive members
drive_id_to_members_map = _get_drive_members(google_drive_connector)

# Get all group emails
all_group_emails = _get_all_groups(
admin_service, google_drive_connector.google_domain
)

# Map group emails to their members
group_email_to_member_emails_map = _map_group_email_to_member_emails(
admin_service, all_group_emails
)

# Convert the maps to onyx groups
onyx_groups = _build_onyx_groups(
drive_id_to_members_map=drive_id_to_members_map,
group_email_to_member_emails_map=group_email_to_member_emails_map,
)

return onyx_groups
6 changes: 3 additions & 3 deletions backend/onyx/connectors/google_drive/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def _get_all_user_emails(self) -> list[str]:
user_emails.append(email)
return user_emails

def _get_all_drive_ids(self) -> set[str]:
def get_all_drive_ids(self) -> set[str]:
primary_drive_service = get_drive_service(
creds=self.creds,
user_email=self.primary_admin_email,
Expand Down Expand Up @@ -353,7 +353,7 @@ def _manage_service_account_retrieval(
) -> Iterator[GoogleDriveFileType]:
all_org_emails: list[str] = self._get_all_user_emails()

all_drive_ids: set[str] = self._get_all_drive_ids()
all_drive_ids: set[str] = self.get_all_drive_ids()

drive_ids_to_retrieve: set[str] = set()
folder_ids_to_retrieve: set[str] = set()
Expand Down Expand Up @@ -437,7 +437,7 @@ def _manage_oauth_retrieval(
# If all 3 are true, we already yielded from get_all_files_for_oauth
return

all_drive_ids = self._get_all_drive_ids()
all_drive_ids = self.get_all_drive_ids()
drive_ids_to_retrieve: set[str] = set()
folder_ids_to_retrieve: set[str] = set()
if self._requested_shared_drive_ids or self._requested_folder_ids:
Expand Down
1 change: 1 addition & 0 deletions backend/onyx/connectors/google_drive/doc_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:
id=file["webViewLink"],
perm_sync_data={
"doc_id": file.get("id"),
"drive_id": file.get("driveId"),
"permissions": file.get("permissions", []),
"permission_ids": file.get("permissionIds", []),
"name": file.get("name"),
Expand Down
2 changes: 1 addition & 1 deletion backend/onyx/connectors/google_drive/file_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"shortcutDetails, owners(emailAddress), size)"
)
SLIM_FILE_FIELDS = (
"nextPageToken, files(mimeType, id, name, permissions(emailAddress, type), "
"nextPageToken, files(mimeType, driveId, id, name, permissions(emailAddress, type), "
"permissionIds, webViewLink, owners(emailAddress))"
)
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
Expand Down
Loading