Skip to content

Commit

Permalink
update branch
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarghi-nv committed Jan 9, 2025
2 parents e1e2d34 + cddd69e commit a21e673
Show file tree
Hide file tree
Showing 12 changed files with 547 additions and 86 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# Please keep pr-builder as the top job here
pr-builder:
needs:
- check-nightly-ci
- changed-files
- checks
- conda-cpp-build
Expand Down Expand Up @@ -42,6 +43,18 @@ jobs:
- name: Telemetry setup
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
check-nightly-ci:
# Switch to ubuntu-latest once it defaults to a version of Ubuntu that
# provides at least Python 3.11 (see
# https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
runs-on: ubuntu-24.04
env:
RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Check if nightly CI is passing
uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
with:
repo: cugraph
changed-files:
secrets: inherit
needs: telemetry-setup
Expand Down
46 changes: 22 additions & 24 deletions ci/notebook_list.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -17,7 +17,7 @@
import glob
from pathlib import Path

from numba import cuda
from cuda.bindings import runtime

# for adding another run type and skip file name add to this dictionary
runtype_dict = {
Expand All @@ -30,20 +30,27 @@

def skip_book_dir(runtype):
# Add all run types here, currently only CI supported
return runtype in runtype_dict and Path(runtype_dict.get(runtype)).is_file()

if runtype in runtype_dict.keys():
if Path(runtype_dict.get(runtype)).is_file():
return True
return False

def _get_cuda_version_string():
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major, minor = divmod(version, 1000)
minor //= 10
return f"{major}.{minor}"


def _is_ampere_or_newer():
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return (device_prop.major, device_prop.minor) >= (8, 0)

cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
#
# Not strictly true... however what we mean is
# Pascal or earlier
#
ampere = False
device = cuda.get_current_device()

parser = argparse.ArgumentParser(description="Condition for running the notebook tests")
parser.add_argument("runtype", type=str)
Expand All @@ -52,19 +59,10 @@ def skip_book_dir(runtype):

runtype = args.runtype

if runtype not in runtype_dict.keys():
if runtype not in runtype_dict:
print(f"Unknown Run Type = {runtype}", file=sys.stderr)
exit()


# check for the attribute using both pre and post numba 0.53 names
cc = getattr(device, "COMPUTE_CAPABILITY", None) or getattr(
device, "compute_capability"
)
if cc[0] >= 8:
ampere = True

skip = False
for filename in glob.iglob("**/*.ipynb", recursive=True):
skip = False
if skip_book_dir(runtype):
Expand All @@ -88,7 +86,7 @@ def skip_book_dir(runtype):
)
skip = True
break
elif ampere and re.search("# Does not run on Ampere", line):
elif _is_ampere_or_newer() and re.search("# Does not run on Ampere", line):
print(f"SKIPPING {filename} (does not run on Ampere)", file=sys.stderr)
skip = True
break
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,7 +39,7 @@ function(find_and_configure_raft)
endif()

rapids_cpm_find(raft ${PKG_VERSION}
GLOBAL_TARGETS raft::raft
GLOBAL_TARGETS raft::raft raft::raft_logger raft::raft_logger_impl
BUILD_EXPORT_SET cugraph-exports
INSTALL_EXPORT_SET cugraph-exports
COMPONENTS ${RAFT_COMPONENTS}
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/c_api/neighbor_sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,11 +948,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor {
std::exclusive_scan(
recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0});

rmm::device_uvector<label_t> tmp_label_to_comm_rank(
label_to_comm_rank = rmm::device_uvector<label_t>(
displacements.back() + recvcounts.back(), handle_.get_stream());

label_to_comm_rank = std::move(tmp_label_to_comm_rank);

cugraph::device_allgatherv(handle_.get_comms(),
local_label_to_comm_rank.begin(),
(*label_to_comm_rank).begin(),
Expand Down
13 changes: 6 additions & 7 deletions python/cugraph/cugraph/dask/common/mg_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -13,7 +13,7 @@

import os
import gc
import numba.cuda
from cuda.bindings import runtime


# FIXME: this raft import breaks the library if ucx-py is
Expand Down Expand Up @@ -53,11 +53,10 @@ def prepare_worker_to_parts(data, client=None):


def is_single_gpu():
ngpus = len(numba.cuda.gpus)
if ngpus > 1:
return False
else:
return True
status, count = runtime.cudaGetDeviceCount()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device count.")
return count > 1


def get_visible_devices():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -32,6 +32,20 @@ def get_cudart_version():
return major * 1000 + minor * 10


pytestmark = [
pytest.mark.skipif(
isinstance(torch, MissingModule) or not torch.cuda.is_available(),
reason="PyTorch with GPU support not available",
),
pytest.mark.skipif(
isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
),
pytest.mark.skipif(
get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
),
]


def runtest(rank: int, world_size: int):
torch.cuda.set_device(rank)

Expand Down Expand Up @@ -69,13 +83,6 @@ def runtest(rank: int, world_size: int):


@pytest.mark.sg
@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
@pytest.mark.skipif(
isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
)
@pytest.mark.skipif(
get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
)
def test_feature_storage_wholegraph_backend():
world_size = torch.cuda.device_count()
print("gpu count:", world_size)
Expand All @@ -87,13 +94,6 @@ def test_feature_storage_wholegraph_backend():


@pytest.mark.mg
@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
@pytest.mark.skipif(
isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
)
@pytest.mark.skipif(
get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8"
)
def test_feature_storage_wholegraph_backend_mg():
world_size = torch.cuda.device_count()
print("gpu count:", world_size)
Expand Down
14 changes: 11 additions & 3 deletions python/cugraph/cugraph/tests/docs/test_doctests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -25,14 +25,21 @@
import cugraph
import pylibcugraph
import cudf
from numba import cuda
from cuda.bindings import runtime
from cugraph.testing import utils


modules_to_skip = ["dask", "proto", "raft"]
datasets = utils.RAPIDS_DATASET_ROOT_DIR_PATH

cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])

def _get_cuda_version_string():
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major = version // 1000
minor = (version % 1000) // 10
return f"{major}.{minor}"


def _is_public_name(name):
Expand Down Expand Up @@ -131,6 +138,7 @@ def skip_docstring(docstring_obj):
NOTE: this function is currently not available on CUDA 11.4 systems.
"""
docstring = docstring_obj.docstring
cuda_version_string = _get_cuda_version_string()
for line in docstring.splitlines():
if f"currently not available on CUDA {cuda_version_string} systems" in line:
return f"docstring example not supported on CUDA {cuda_version_string}"
Expand Down
3 changes: 1 addition & 2 deletions python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -19,7 +19,6 @@
from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost
from cugraph.structure.graph_primtypes cimport *
from libc.stdint cimport uintptr_t
from numba import cuda
import cudf
import numpy as np

Expand Down
54 changes: 24 additions & 30 deletions python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -15,13 +15,10 @@
import os
import shutil

from numba import cuda

import cudf
from cudf.core.column import as_column

from cuda.cudart import cudaDeviceAttr
from rmm._cuda.gpu import getDeviceAttribute
from cuda.bindings import runtime

from warnings import warn

Expand Down Expand Up @@ -210,45 +207,42 @@ def get_traversed_path_list(df, id):
return answer


def is_cuda_version_less_than(min_version=(10, 2)):
def is_cuda_version_less_than(min_version):
"""
Returns True if the version of CUDA being used is less than min_version
"""
this_cuda_ver = cuda.runtime.get_version() # returns (<major>, <minor>)
if this_cuda_ver[0] > min_version[0]:
return False
if this_cuda_ver[0] < min_version[0]:
return True
if this_cuda_ver[1] < min_version[1]:
return True
return False
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major = version // 1000
minor = (version % 1000) // 10
return (major, minor) < min_version


def is_device_version_less_than(min_version=(7, 0)):
def is_device_version_less_than(min_version):
"""
Returns True if the version of CUDA being used is less than min_version
"""
major_version = getDeviceAttribute(
cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
)
minor_version = getDeviceAttribute(
cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
)
if major_version > min_version[0]:
return False
if major_version < min_version[0]:
return True
if minor_version < min_version[1]:
return True
return False
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return (device_prop.major, device_prop.minor) < min_version


def get_device_memory_info():
"""
Returns the total amount of global memory on the device in bytes
"""
meminfo = cuda.current_context().get_memory_info()
return meminfo[1]
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return device_prop.totalGlobalMem


# FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if
Expand Down
Loading

0 comments on commit a21e673

Please sign in to comment.