From 7f0cd31a5930be8e840c9d5764855b19bdc47b36 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 11 Nov 2024 20:47:06 -0600 Subject: [PATCH 1/2] Only print indices that are duplicates --- thicket/utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/thicket/utils.py b/thicket/utils.py index 4373cf01..50c160cb 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: MIT -from collections import OrderedDict, defaultdict +from collections import Counter, OrderedDict, defaultdict import warnings import numpy as np @@ -72,11 +72,10 @@ def _check_duplicate_inner_idx(df): """Check for duplicate values in the innermost indices.""" for node in set(df.index.get_level_values("node")): inner_idx_values = sorted(df.loc[node].index.tolist()) - inner_idx_values_set = sorted(list(set(inner_idx_values))) - if inner_idx_values != inner_idx_values_set: - raise DuplicateIndexError( - f"Duplicate index {set(inner_idx_values)} found in DataFrame index." - ) + counts = Counter(inner_idx_values) + duplicates = [item for item, count in counts.items() if count > 1] + if len(duplicates) > 0: + raise DuplicateIndexError(f"Duplicate indices found in DataFrame index.\n\t{duplicates}") def _check_missing_hnid(df): """Check if there are missing hatchet nid's.""" From 8445f2a0a3970fe9bfa9625c089066380e38902d Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Tue, 12 Nov 2024 17:38:36 -0600 Subject: [PATCH 2/2] black --- thicket/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/thicket/utils.py b/thicket/utils.py index 50c160cb..16cef3ac 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -75,7 +75,9 @@ def _check_duplicate_inner_idx(df): counts = Counter(inner_idx_values) duplicates = [item for item, count in counts.items() if count > 1] if len(duplicates) > 0: - raise DuplicateIndexError(f"Duplicate indices found in DataFrame index.\n\t{duplicates}") + raise DuplicateIndexError( + f"Duplicate indices found in DataFrame index.\n\t{duplicates}" + ) def _check_missing_hnid(df): """Check if there are missing hatchet nid's."""