update branch

rapidsai · Jan 9, 2025 · a21e673 · a21e673
2 parents e1e2d34 + cddd69e
commit a21e673
Show file tree

Hide file tree

Showing 12 changed files with 547 additions and 86 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -13,6 +13,7 @@ jobs:
   # Please keep pr-builder as the top job here
   pr-builder:
     needs:
+      - check-nightly-ci
       - changed-files
       - checks
       - conda-cpp-build
@@ -42,6 +43,18 @@ jobs:
       - name: Telemetry setup
         if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
         uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
+  check-nightly-ci:
+    # Switch to ubuntu-latest once it defaults to a version of Ubuntu that
+    # provides at least Python 3.11 (see
+    # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
+    runs-on: ubuntu-24.04
+    env:
+      RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - name: Check if nightly CI is passing
+        uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
+        with:
+          repo: cugraph
   changed-files:
     secrets: inherit
     needs: telemetry-setup

diff --git a/ci/notebook_list.py b/ci/notebook_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,7 +17,7 @@
 import glob
 from pathlib import Path
 
-from numba import cuda
+from cuda.bindings import runtime
 
 # for adding another run type and skip file name add to this dictionary
 runtype_dict = {
@@ -30,20 +30,27 @@
 
 def skip_book_dir(runtype):
     # Add all run types here, currently only CI supported
+    return runtype in runtype_dict and Path(runtype_dict.get(runtype)).is_file()
 
-    if runtype in runtype_dict.keys():
-        if Path(runtype_dict.get(runtype)).is_file():
-            return True
-    return False
 
+def _get_cuda_version_string():
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major, minor = divmod(version, 1000)
+    minor //= 10
+    return f"{major}.{minor}"
+
+
+def _is_ampere_or_newer():
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return (device_prop.major, device_prop.minor) >= (8, 0)
 
-cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
-#
-# Not strictly true... however what we mean is
-# Pascal or earlier
-#
-ampere = False
-device = cuda.get_current_device()
 
 parser = argparse.ArgumentParser(description="Condition for running the notebook tests")
 parser.add_argument("runtype", type=str)
@@ -52,19 +59,10 @@ def skip_book_dir(runtype):
 
 runtype = args.runtype
 
-if runtype not in runtype_dict.keys():
+if runtype not in runtype_dict:
     print(f"Unknown Run Type  = {runtype}", file=sys.stderr)
     exit()
 
-
-# check for the attribute using both pre and post numba 0.53 names
-cc = getattr(device, "COMPUTE_CAPABILITY", None) or getattr(
-    device, "compute_capability"
-)
-if cc[0] >= 8:
-    ampere = True
-
-skip = False
 for filename in glob.iglob("**/*.ipynb", recursive=True):
     skip = False
     if skip_book_dir(runtype):
@@ -88,7 +86,7 @@ def skip_book_dir(runtype):
                 )
                 skip = True
                 break
-            elif ampere and re.search("# Does not run on Ampere", line):
+            elif _is_ampere_or_newer() and re.search("# Does not run on Ampere", line):
                 print(f"SKIPPING {filename} (does not run on Ampere)", file=sys.stderr)
                 skip = True
                 break

diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@ function(find_and_configure_raft)
     endif()
 
     rapids_cpm_find(raft ${PKG_VERSION}
-      GLOBAL_TARGETS      raft::raft
+      GLOBAL_TARGETS      raft::raft raft::raft_logger raft::raft_logger_impl
       BUILD_EXPORT_SET    cugraph-exports
       INSTALL_EXPORT_SET  cugraph-exports
       COMPONENTS ${RAFT_COMPONENTS}

diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp
@@ -948,11 +948,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor {
           std::exclusive_scan(
             recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0});
 
-          rmm::device_uvector<label_t> tmp_label_to_comm_rank(
+          label_to_comm_rank = rmm::device_uvector<label_t>(
             displacements.back() + recvcounts.back(), handle_.get_stream());
 
-          label_to_comm_rank = std::move(tmp_label_to_comm_rank);
-
           cugraph::device_allgatherv(handle_.get_comms(),
                                      local_label_to_comm_rank.begin(),
                                      (*label_to_comm_rank).begin(),

diff --git a/python/cugraph/cugraph/dask/common/mg_utils.py b/python/cugraph/cugraph/dask/common/mg_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -13,7 +13,7 @@
 
 import os
 import gc
-import numba.cuda
+from cuda.bindings import runtime
 
 
 # FIXME: this raft import breaks the library if ucx-py is
@@ -53,11 +53,10 @@ def prepare_worker_to_parts(data, client=None):
 
 
 def is_single_gpu():
-    ngpus = len(numba.cuda.gpus)
-    if ngpus > 1:
-        return False
-    else:
-        return True
+    status, count = runtime.cudaGetDeviceCount()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device count.")
+    return count > 1
 
 
 def get_visible_devices():

diff --git a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -32,6 +32,20 @@ def get_cudart_version():
     return major * 1000 + minor * 10
 
 
+pytestmark = [
+    pytest.mark.skipif(
+        isinstance(torch, MissingModule) or not torch.cuda.is_available(),
+        reason="PyTorch with GPU support not available",
+    ),
+    pytest.mark.skipif(
+        isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
+    ),
+    pytest.mark.skipif(
+        get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
+    ),
+]
+
+
 def runtest(rank: int, world_size: int):
     torch.cuda.set_device(rank)
 
@@ -69,13 +83,6 @@ def runtest(rank: int, world_size: int):
 
 
 @pytest.mark.sg
-@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
-@pytest.mark.skipif(
-    isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
-)
-@pytest.mark.skipif(
-    get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
-)
 def test_feature_storage_wholegraph_backend():
     world_size = torch.cuda.device_count()
     print("gpu count:", world_size)
@@ -87,13 +94,6 @@ def test_feature_storage_wholegraph_backend():
 
 
 @pytest.mark.mg
-@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
-@pytest.mark.skipif(
-    isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
-)
-@pytest.mark.skipif(
-    get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
-)
 def test_feature_storage_wholegraph_backend_mg():
     world_size = torch.cuda.device_count()
     print("gpu count:", world_size)

diff --git a/python/cugraph/cugraph/tests/docs/test_doctests.py b/python/cugraph/cugraph/tests/docs/test_doctests.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -25,14 +25,21 @@
 import cugraph
 import pylibcugraph
 import cudf
-from numba import cuda
+from cuda.bindings import runtime
 from cugraph.testing import utils
 
 
 modules_to_skip = ["dask", "proto", "raft"]
 datasets = utils.RAPIDS_DATASET_ROOT_DIR_PATH
 
-cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
+
+def _get_cuda_version_string():
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major = version // 1000
+    minor = (version % 1000) // 10
+    return f"{major}.{minor}"
 
 
 def _is_public_name(name):
@@ -131,6 +138,7 @@ def skip_docstring(docstring_obj):
     NOTE: this function is currently not available on CUDA 11.4 systems.
     """
     docstring = docstring_obj.docstring
+    cuda_version_string = _get_cuda_version_string()
     for line in docstring.splitlines():
         if f"currently not available on CUDA {cuda_version_string} systems" in line:
             return f"docstring example not supported on CUDA {cuda_version_string}"

diff --git a/python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx b/python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,7 +19,6 @@
 from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost
 from cugraph.structure.graph_primtypes cimport *
 from libc.stdint cimport uintptr_t
-from numba import cuda
 import cudf
 import numpy as np
 

diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,13 +15,10 @@
 import os
 import shutil
 
-from numba import cuda
-
 import cudf
 from cudf.core.column import as_column
 
-from cuda.cudart import cudaDeviceAttr
-from rmm._cuda.gpu import getDeviceAttribute
+from cuda.bindings import runtime
 
 from warnings import warn
 
@@ -210,45 +207,42 @@ def get_traversed_path_list(df, id):
     return answer
 
 
-def is_cuda_version_less_than(min_version=(10, 2)):
+def is_cuda_version_less_than(min_version):
     """
     Returns True if the version of CUDA being used is less than min_version
     """
-    this_cuda_ver = cuda.runtime.get_version()  # returns (<major>, <minor>)
-    if this_cuda_ver[0] > min_version[0]:
-        return False
-    if this_cuda_ver[0] < min_version[0]:
-        return True
-    if this_cuda_ver[1] < min_version[1]:
-        return True
-    return False
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major = version // 1000
+    minor = (version % 1000) // 10
+    return (major, minor) < min_version
 
 
-def is_device_version_less_than(min_version=(7, 0)):
+def is_device_version_less_than(min_version):
     """
     Returns True if the version of CUDA being used is less than min_version
     """
-    major_version = getDeviceAttribute(
-        cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
-    )
-    minor_version = getDeviceAttribute(
-        cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
-    )
-    if major_version > min_version[0]:
-        return False
-    if major_version < min_version[0]:
-        return True
-    if minor_version < min_version[1]:
-        return True
-    return False
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return (device_prop.major, device_prop.minor) < min_version
 
 
 def get_device_memory_info():
     """
     Returns the total amount of global memory on the device in bytes
     """
-    meminfo = cuda.current_context().get_memory_info()
-    return meminfo[1]
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return device_prop.totalGlobalMem
 
 
 # FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if