Address Leiden numbering issue (#4845)

Our current implementation of Leiden can return non contiguous cluster IDs however, there is an unused utility function [relabel_cluster_ids](https://github.com/rapidsai/cugraph/blob/branch-25.02/cpp/src/community/leiden_impl.cuh#L601:L604) that serves the purpose of relabeling. This PR - Addresses the Leiden numbering issue from [4791](#4791) by calling `relabel_cluster_ids` after flattening the dendrogram. - Fixes a bug in the MG python API of Leiden which requires a different seed for each GPU in the C++ API - Add SG and MG C++ tests - Add a python SG and MG test capturing the numbering issue closes #4791 Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) URL: #4845
rapidsai · Jan 11, 2025 · ed954dc · ed954dc
1 parent a5679f0
commit ed954dc
Show file tree

Hide file tree

Showing 6 changed files with 125 additions and 27 deletions.
diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -604,26 +604,20 @@ void relabel_cluster_ids(raft::handle_t const& handle,
                          size_t num_nodes)
 {
   vertex_t local_cluster_id_first{0};
+
+  // Get unique cluster id and shuffle
+  remove_duplicates<vertex_t, multi_gpu>(handle, unique_cluster_ids);
+
   if constexpr (multi_gpu) {
-    auto unique_cluster_range_lasts = cugraph::partition_manager::compute_partition_range_lasts(
-      handle, static_cast<vertex_t>(unique_cluster_ids.size()));
-
-    auto& comm                 = handle.get_comms();
-    auto const comm_size       = comm.get_size();
-    auto const comm_rank       = comm.get_rank();
-    auto& major_comm           = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
-    auto const major_comm_size = major_comm.get_size();
-    auto const major_comm_rank = major_comm.get_rank();
-    auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
-    auto const minor_comm_size = minor_comm.get_size();
-    auto const minor_comm_rank = minor_comm.get_rank();
-
-    auto vertex_partition_id =
-      partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks(
-        major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);
-
-    local_cluster_id_first =
-      vertex_partition_id == 0 ? vertex_t{0} : unique_cluster_range_lasts[vertex_partition_id - 1];
+    auto cluster_ids_size_per_rank = cugraph::host_scalar_allgather(
+      handle.get_comms(), unique_cluster_ids.size(), handle.get_stream());
+
+    std::vector<vertex_t> cluster_ids_starts(cluster_ids_size_per_rank.size());
+    std::exclusive_scan(cluster_ids_size_per_rank.begin(),
+                        cluster_ids_size_per_rank.end(),
+                        cluster_ids_starts.begin(),
+                        size_t{0});
+    local_cluster_id_first = cluster_ids_starts[handle.get_comms().get_rank()];
   }
 
   rmm::device_uvector<vertex_t> numbering_indices(unique_cluster_ids.size(), handle.get_stream());
@@ -713,6 +707,17 @@ std::pair<size_t, weight_t> leiden(
 
   detail::flatten_leiden_dendrogram(handle, graph_view, *dendrogram, clustering);
 
+  size_t local_num_verts = (*dendrogram).get_level_size_nocheck(0);
+  rmm::device_uvector<vertex_t> unique_cluster_ids(local_num_verts, handle.get_stream());
+
+  thrust::copy(handle.get_thrust_policy(),
+               clustering,
+               clustering + local_num_verts,
+               unique_cluster_ids.begin());
+
+  detail::relabel_cluster_ids<vertex_t, multi_gpu>(
+    handle, unique_cluster_ids, clustering, local_num_verts);
+
   return std::make_pair(dendrogram->num_levels(), modularity);
 }
 

diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * NVIDIA CORPORATION and its licensors retain all intellectual property
  * and proprietary rights in and to this software, related documentation
@@ -9,6 +9,7 @@
  *
  */
 #include "utilities/base_fixture.hpp"
+#include "utilities/conversion_utilities.hpp"
 #include "utilities/test_graphs.hpp"
 
 #include <cugraph/algorithms.hpp>
@@ -128,6 +129,22 @@ class Tests_Leiden : public ::testing::TestWithParam<std::tuple<Leiden_Usecase,
       ASSERT_FLOAT_EQ(compare_modularity, expected_modularity);
       ASSERT_EQ(level, expected_level);
     }
+
+    auto unique_clustering_v = cugraph::test::sort<vertex_t>(handle, clustering_v);
+
+    unique_clustering_v = cugraph::test::unique<vertex_t>(handle, std::move(unique_clustering_v));
+
+    auto expected_unique_clustering_v =
+      cugraph::test::sequence<int32_t>(handle, unique_clustering_v.size(), size_t{1}, int32_t{0});
+
+    auto h_unique_clustering_v = cugraph::test::to_host(handle, unique_clustering_v);
+    auto h_expected_unique_clustering_v =
+      cugraph::test::to_host(handle, expected_unique_clustering_v);
+
+    ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(),
+                           h_unique_clustering_v.end(),
+                           h_expected_unique_clustering_v.begin()))
+      << "Returned cluster IDs are not numbered consecutively";
   }
 };
 

diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -175,6 +175,7 @@ class Tests_MGLeiden
     if (leiden_usecase.check_correctness_) {
       SCOPED_TRACE("compare modularity input");
 
+      // FIXME: The dendrogram is unused
       compare_sg_results<vertex_t, edge_t, weight_t>(*handle_,
                                                      rng_state,
                                                      mg_graph_view,
@@ -184,6 +185,41 @@ class Tests_MGLeiden
                                                      leiden_usecase.theta_,
                                                      mg_modularity);
     }
+
+    // Check numbering
+    vertex_t num_vertices = mg_graph_view.local_vertex_partition_range_size();
+    rmm::device_uvector<vertex_t> clustering_v(num_vertices, handle_->get_stream());
+    cugraph::leiden<vertex_t, edge_t, weight_t, true>(*handle_,
+                                                      rng_state,
+                                                      mg_graph_view,
+                                                      mg_edge_weight_view,
+                                                      clustering_v.data(),
+                                                      leiden_usecase.max_level_,
+                                                      leiden_usecase.resolution_);
+
+    auto unique_clustering_v = cugraph::test::sort<vertex_t>(*handle_, clustering_v);
+
+    unique_clustering_v = cugraph::test::unique<vertex_t>(*handle_, std::move(unique_clustering_v));
+
+    unique_clustering_v = cugraph::test::device_allgatherv(
+      *handle_, unique_clustering_v.data(), unique_clustering_v.size());
+
+    unique_clustering_v = cugraph::test::sort<vertex_t>(*handle_, unique_clustering_v);
+
+    unique_clustering_v = cugraph::test::unique<vertex_t>(*handle_, std::move(unique_clustering_v));
+
+    auto h_unique_clustering_v = cugraph::test::to_host(*handle_, unique_clustering_v);
+
+    auto expected_unique_clustering_v = cugraph::test::sequence<int32_t>(
+      *handle_, unique_clustering_v.size(), size_t{1}, h_unique_clustering_v[0]);
+
+    auto h_expected_unique_clustering_v =
+      cugraph::test::to_host(*handle_, expected_unique_clustering_v);
+
+    ASSERT_TRUE(std::equal(h_unique_clustering_v.begin(),
+                           h_unique_clustering_v.end(),
+                           h_expected_unique_clustering_v.begin()))
+      << "Returned cluster IDs are not numbered consecutively";
   }
 
  private:

diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -156,13 +156,13 @@ def leiden(
             input_graph._plc_graph[w],
             max_iter,
             resolution,
-            random_state,
+            (random_state + i) if random_state is not None else random_state,
             theta,
             do_expensive_check,
             workers=[w],
             allow_other_workers=False,
         )
-        for w in Comms.get_workers()
+        for i, w in enumerate(Comms.get_workers())
     ]
 
     wait(result)

diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,6 +19,7 @@
 
 import cugraph
 import cudf
+from cudf.testing.testing import assert_series_equal
 from cugraph.testing import utils, UNDIRECTED_DATASETS
 from cugraph.datasets import karate_asymmetric
 
@@ -185,6 +186,18 @@ def test_leiden(graph_file):
     leiden_parts, leiden_mod = cugraph_leiden(G)
     louvain_parts, louvain_mod = cugraph_louvain(G)
 
+    unique_parts = (
+        leiden_parts["partition"]
+        .drop_duplicates()
+        .sort_values(ascending=True)
+        .reset_index(drop=True)
+    )
+
+    idx_col = cudf.Series(unique_parts.index)
+
+    # Ensure Leiden cluster's ID are numbered consecutively
+    assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False)
+
     # Leiden modularity score is smaller than Louvain's
     assert leiden_mod >= (0.75 * louvain_mod)
 
@@ -202,6 +215,18 @@ def test_leiden_nx(graph_file):
     leiden_parts, leiden_mod = cugraph_leiden(G)
     louvain_parts, louvain_mod = cugraph_louvain(G)
 
+    unique_parts = (
+        cudf.Series(leiden_parts.values())
+        .drop_duplicates()
+        .sort_values(ascending=True)
+        .reset_index(drop=True)
+    )
+
+    idx_col = cudf.Series(unique_parts.index)
+
+    # Ensure Leiden cluster's ID are numbered consecutively
+    assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False)
+
     # Calculating modularity scores for comparison
     # Leiden modularity score is smaller than Louvain's
     assert leiden_mod >= (0.75 * louvain_mod)

diff --git a/python/cugraph/cugraph/tests/community/test_leiden_mg.py b/python/cugraph/cugraph/tests/community/test_leiden_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,6 +16,8 @@
 import cugraph
 import cugraph.dask as dcg
 from cugraph.datasets import karate_asymmetric, karate, dolphins
+import cudf
+from cudf.testing.testing import assert_series_equal
 
 
 # =============================================================================
@@ -64,6 +66,19 @@ def test_mg_leiden_with_edgevals_undirected_graph(dask_client, dataset):
     dg = get_mg_graph(dataset, directed=False)
     parts, mod = dcg.leiden(dg)
 
+    unique_parts = (
+        parts["partition"]
+        .compute()
+        .drop_duplicates()
+        .sort_values(ascending=True)
+        .reset_index(drop=True)
+    )
+
+    idx_col = cudf.Series(unique_parts.index)
+
+    # Ensure Leiden cluster's ID are numbered consecutively
+    assert_series_equal(unique_parts, idx_col, check_dtype=False, check_names=False)
+
     # FIXME: either call Nx with the same dataset and compare results, or
     # hardcode golden results to compare to.
     print()