From acd35c585aacd5c5faa6d56e3539aa7d2dfc56c9 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 27 Jan 2021 13:52:59 -0500 Subject: [PATCH 01/51] DOC v0.19 Updates --- CHANGELOG.md | 8 ++++++++ conda/environments/cugraph_dev_cuda10.1.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda10.2.yml | 16 ++++++++-------- conda/environments/cugraph_dev_cuda11.0.yml | 16 ++++++++-------- cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 4 ++-- 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b73fd539c3d..548f0dae747 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# cuGraph 0.19.0 (Date TBD) + +## New Features + +## Improvements + +## Bug Fixes + # cuGraph 0.18.0 (Date TBD) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 067fd0bc4ba..f6f2a65e3f3 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 3371340d8bd..a070819064b 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index ee3b57632a1..3421152af06 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7871ce7581b..2c9c2918e54 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) -project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) ################################################################################################### # - build type ------------------------------------------------------------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index 6b484a5f57b..eb4745a61f0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,9 +80,9 @@ # built documents. # # The short X.Y version. -version = '0.18' +version = '0.19' # The full version, including alpha/beta/rc tags. -release = '0.18.0' +release = '0.19.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 369beee0f912e87bd8272ae500bc56711319f6d6 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 22 Feb 2021 15:42:16 -0500 Subject: [PATCH 02/51] Rename sort_and_shuffle to groupby_gpuid_and_shuffle (#1392) Rename to better reflect what this function should do than how it is currently implemented (which can change in the future for better performance). Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1392 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 4 ++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- cpp/include/utilities/cython.hpp | 3 ++- cpp/include/utilities/shuffle_comm.cuh | 22 +++++++++---------- cpp/src/experimental/coarsen_graph.cu | 4 ++-- cpp/src/experimental/relabel.cu | 4 ++-- cpp/src/experimental/renumber_edgelist.cu | 18 +++++++-------- cpp/src/utilities/cython.cu | 5 +++-- python/cugraph/structure/graph_primtypes.pxd | 2 +- 9 files changed, 33 insertions(+), 31 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 774f6d08bf4..785f8197aff 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -234,7 +234,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_unique_keys(0, handle.get_stream()); std::vector rx_value_counts{}; - std::tie(rx_unique_keys, rx_value_counts) = sort_and_shuffle_values( + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( comm, unique_keys.begin(), unique_keys.end(), @@ -372,7 +372,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream()); std::forward_as_tuple( std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( sub_comm, triplet_first, triplet_first + tmp_major_vertices.size(), diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 7ffd17faec9..70b6dc92752 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -241,7 +241,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( rmm::device_uvector rx_unique_keys(0, handle.get_stream()); auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = - sort_and_shuffle_kv_pairs( + groupby_gpuid_and_shuffle_kv_pairs( comm, unique_keys.begin(), unique_keys.end(), diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index a22553777e2..e94190897b8 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -451,7 +451,8 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, template std::unique_ptr> call_shuffle( raft::handle_t const& handle, - vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] edge_t num_edgelist_edges, diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 05fe51184ca..7e04c7e1972 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -232,11 +232,11 @@ auto shuffle_values(raft::comms::comms_t const &comm, } template -auto sort_and_shuffle_values(raft::comms::comms_t const &comm, - ValueIterator tx_value_first /* [INOUT */, - ValueIterator tx_value_last /* [INOUT */, - ValueToGPUIdOp value_to_gpu_id_op, - cudaStream_t stream) +auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) { auto const comm_size = comm.get_size(); @@ -275,12 +275,12 @@ auto sort_and_shuffle_values(raft::comms::comms_t const &comm, } template -auto sort_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, - VertexIterator tx_key_first /* [INOUT */, - VertexIterator tx_key_last /* [INOUT */, - ValueIterator tx_value_first /* [INOUT */, - KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) +auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) { auto d_tx_value_counts = detail::sort_and_count( comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 02b0c388b31..0cd551b0d73 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -329,7 +329,7 @@ coarsen_graph( std::forward_as_tuple( std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( handle.get_comms(), edge_first, edge_first + coarsened_edgelist_major_vertices.size(), @@ -371,7 +371,7 @@ coarsen_graph( handle.get_stream()); rmm::device_uvector rx_unique_labels(0, handle.get_stream()); - std::tie(rx_unique_labels, std::ignore) = sort_and_shuffle_values( + std::tie(rx_unique_labels, std::ignore) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_labels.begin(), unique_labels.end(), diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 4a36cac180d..62bd6951f71 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -106,7 +106,7 @@ void relabel(raft::handle_t const& handle, thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); std::forward_as_tuple(std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels), std::ignore) = - sort_and_shuffle_values( + groupby_gpuid_and_shuffle_values( handle.get_comms(), pair_first, pair_first + num_label_pairs, @@ -142,7 +142,7 @@ void relabel(raft::handle_t const& handle, { rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); std::vector rx_value_counts{}; - std::tie(rx_unique_old_labels, rx_value_counts) = sort_and_shuffle_values( + std::tie(rx_unique_old_labels, rx_value_counts) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_old_labels.begin(), unique_old_labels.end(), diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 1f9a5a573fa..6a5a1c732c2 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -151,14 +151,14 @@ rmm::device_uvector compute_renumber_map( auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); rmm::device_uvector rx_labels(0, handle.get_stream()); rmm::device_uvector rx_counts(0, handle.get_stream()); - std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = sort_and_shuffle_values( - comm, - pair_first, - pair_first + labels.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val)); - }, - handle.get_stream()); + std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = + groupby_gpuid_and_shuffle_values( + comm, + pair_first, + pair_first + labels.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); labels.resize(rx_labels.size(), handle.get_stream()); counts.resize(labels.size(), handle.get_stream()); @@ -309,7 +309,7 @@ void expensive_check_edgelist( handle.get_stream()); rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); - std::tie(rx_unique_edge_vertices, std::ignore) = sort_and_shuffle_values( + std::tie(rx_unique_edge_vertices, std::ignore) = groupby_gpuid_and_shuffle_values( handle.get_comms(), unique_edge_vertices.begin(), unique_edge_vertices.end(), diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index ce7b45c1c08..e95a001cb91 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -749,7 +749,8 @@ void call_sssp(raft::handle_t const& handle, template std::unique_ptr> call_shuffle( raft::handle_t const& handle, - vertex_t* edgelist_major_vertices, // [IN / OUT]: sort_and_shuffle_values() sorts in-place + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] edge_t num_edgelist_edges, @@ -770,7 +771,7 @@ std::unique_ptr> call_shuffle( std::forward_as_tuple( std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), std::ignore) = - cugraph::experimental::sort_and_shuffle_values( + cugraph::experimental::groupby_gpuid_and_shuffle_values( comm, // handle.get_comms(), zip_edge, zip_edge + num_edgelist_edges, diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 1a2891494ff..07132df2598 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -200,7 +200,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t get_part_matrix_partition_minor_first() vertex_t get_part_matrix_partition_minor_last() -# 4. `sort_and_shuffle_values()` wrapper: +# 4. `groupby_gpuid_and_shuffle_values()` wrapper: # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": From 89bffa5d5a593eb7e0c3d6b9c319a3cfa217877a Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Tue, 23 Feb 2021 16:34:33 +0100 Subject: [PATCH 03/51] ENH Build with `cmake --build` & Pass ccache variables to conda recipe & use Ninja in CI (#1415) - Build using `cmake --build` - Add ccache env variables to conda recipe - Use Ninja in CI Authors: - Jordan Jacobelli (@Ethyling) Approvers: - Dillon Cullinan (@dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1415 --- build.sh | 8 ++++---- ci/cpu/build.sh | 5 ++++- conda/recipes/libcugraph/meta.yaml | 7 +++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index ef210e841c6..54634e2ca6e 100755 --- a/build.sh +++ b/build.sh @@ -46,7 +46,7 @@ CUGRAPH_BUILD_DIR=${REPODIR}/python/build BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${CUGRAPH_BUILD_DIR}" # Set defaults for vars modified by flags to this script -VERBOSE="" +VERBOSE_FLAG="" BUILD_TYPE=Release INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON @@ -86,7 +86,7 @@ fi # Process flags if hasArg -v; then - VERBOSE=1 + VERBOSE_FLAG="-v" fi if hasArg -g; then BUILD_TYPE=Debug @@ -143,7 +143,7 @@ if buildAll || hasArg libcugraph; then -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ ${REPODIR}/cpp - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG} fi # Build and install the cugraph Python package @@ -169,7 +169,7 @@ if buildAll || hasArg docs; then -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} fi cd ${LIBCUGRAPH_BUILD_DIR} - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} docs_cugraph + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG} cd ${REPODIR}/docs make html fi diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2c6dc899be2..d69448cda4e 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ######################################### # cuGraph CPU conda build script for CI # ######################################### @@ -24,6 +24,9 @@ fi export GPUCI_CONDA_RETRY_MAX=1 export GPUCI_CONDA_RETRY_SLEEP=30 +# Use Ninja to build +export CMAKE_GENERATOR="Ninja" + ################################################################################ # SETUP - Check environment ################################################################################ diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index cd83e5a9b7a..8f7495eab3c 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -21,6 +21,13 @@ build: - CUDAHOSTCXX - PARALLEL_LEVEL - VERSION_SUFFIX + - CCACHE_DIR + - CCACHE_NOHASHDIR + - CCACHE_COMPILERCHECK + - CMAKE_GENERATOR + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER requirements: build: From 06ac713c5e5700185abe28fbc261c84e2b7165a8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:08:15 -0500 Subject: [PATCH 04/51] Matching updates for RAFT comms updates (device_sendrecv, device_multicast_sendrecv, gather, gatherv) (#1391) - [x] Update cuGraph to use RAFT::comms_t's newly added device_sendrecv & device_multicast_sendrecv) - [x] Update cuGraph to use RAFT::comms_t's newly added gather & gatherv - [x] Update RAFT git tag once https://github.com/rapidsai/raft/pull/114 (currently merged in 0.18 but is not merged to 0.19) and https://github.com/rapidsai/raft/pull/144 are merged to 0.19 Ready for review but cannot be merged till RAFT PR 114 and 144 are merged to RAFT branch-0.19. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1391 --- cpp/CMakeLists.txt | 2 +- cpp/include/utilities/device_comm.cuh | 55 +++++++--------------- cpp/include/utilities/host_scalar_comm.cuh | 10 ---- 3 files changed, 17 insertions(+), 50 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b2d537edaa2..d211fe9ed5a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -298,7 +298,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 4a79adcb0c0e87964dcdc9b9122f242b5235b702 + GIT_TAG a3461b201ea1c9f61571f1927274f739e775d2d2 SOURCE_SUBDIR raft ) diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 8c3b0f86a47..24b9147ce3c 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -196,21 +196,13 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, using value_type = typename std::iterator_traits::value_type; static_assert( std::is_same::value_type, value_type>::value); - // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock - ncclGroupStart(); - ncclSend(iter_to_raw_ptr(input_first), - tx_count * sizeof(value_type), - ncclUint8, - dst, - comm.get_nccl_comm(), - stream); - ncclRecv(iter_to_raw_ptr(output_first), - rx_count * sizeof(value_type), - ncclUint8, - src, - comm.get_nccl_comm(), - stream); - ncclGroupEnd(); + comm.device_sendrecv(iter_to_raw_ptr(input_first), + tx_count, + dst, + iter_to_raw_ptr(output_first), + rx_count, + src, + stream); } template @@ -288,25 +280,15 @@ device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, using value_type = typename std::iterator_traits::value_type; static_assert( std::is_same::value_type, value_type>::value); - // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock - ncclGroupStart(); - for (size_t i = 0; i < tx_counts.size(); ++i) { - ncclSend(iter_to_raw_ptr(input_first + tx_offsets[i]), - tx_counts[i] * sizeof(value_type), - ncclUint8, - tx_dst_ranks[i], - comm.get_nccl_comm(), - stream); - } - for (size_t i = 0; i < rx_counts.size(); ++i) { - ncclRecv(iter_to_raw_ptr(output_first + rx_offsets[i]), - rx_counts[i] * sizeof(value_type), - ncclUint8, - rx_src_ranks[i], - comm.get_nccl_comm(), - stream); - } - ncclGroupEnd(); + comm.device_multicast_sendrecv(iter_to_raw_ptr(input_first), + tx_counts, + tx_offsets, + tx_dst_ranks, + iter_to_raw_ptr(output_first), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); } template @@ -589,10 +571,6 @@ device_gatherv_impl(raft::comms::comms_t const& comm, { static_assert(std::is_same::value_type, typename std::iterator_traits::value_type>::value); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gatherv(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), sendcount, @@ -600,7 +578,6 @@ device_gatherv_impl(raft::comms::comms_t const& comm, displacements.data(), root, stream); -#endif } template diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh index dda0ce1f091..2ecfd913813 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -321,16 +321,11 @@ std::enable_if_t::value, std::vector> host_scalar_gathe &input, 1, stream); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), d_outputs.data(), size_t{1}, root, stream); -#endif std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); if (comm.get_rank() == root) { raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); @@ -358,10 +353,6 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre h_tuple_scalar_elements.data(), tuple_size, stream); - // FIXME: should be enabled once the RAFT gather & gatherv PR is merged -#if 1 - CUGRAPH_FAIL("Unimplemented."); -#else comm.gather(comm.get_rank() == root ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size : d_gathered_tuple_scalar_elements.data(), @@ -369,7 +360,6 @@ host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStre tuple_size, root, stream); -#endif std::vector h_gathered_tuple_scalar_elements( comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); if (comm.get_rank() == root) { From 99d1328922b03a96734cf7b520263af66347e55c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:21:27 -0500 Subject: [PATCH 05/51] Adding new primitives: copy_v_transform_reduce_key_aggregated_out_nbr & transform_reduce_by_adj_matrix_row|col_key_e bug fixes (#1399) bug fixes Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1399 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 20 +-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 128 ++++++++---------- cpp/include/utilities/device_comm.cuh | 8 +- 3 files changed, 75 insertions(+), 81 deletions(-) diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 785f8197aff..8490df1d17d 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -18,8 +18,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -100,10 +102,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( } thrust::fill(thrust::seq, major_vertices + local_offset, - major_vertices + local_offset + key_idx, + major_vertices + local_offset + key_idx + 1, matrix_partition.get_major_from_major_offset_nocheck(major_offset)); thrust::fill(thrust::seq, - major_vertices + local_offset + key_idx, + major_vertices + local_offset + key_idx + 1, major_vertices + local_offset + local_degree, invalid_vertex); } @@ -159,8 +161,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( * pairs provided by @p map_key_first, @p map_key_last, and @p map_value_first (aggregated over the * entire set of processes in multi-GPU). * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. - * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for - * each vertex. + * @param init Initial value to be added to the reduced @p reduce_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p @@ -191,6 +192,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( "GraphViewType should support the push model."); static_assert(std::is_same::value_type, typename GraphViewType::vertex_type>::value); + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -393,7 +395,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), triplet_first, - triplet_first + major_vertices.size(), + triplet_first + tmp_major_vertices.size(), tmp_e_op_result_buffer_first, [adj_matrix_row_value_input_first, key_aggregated_e_op, @@ -408,7 +410,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( w, *(adj_matrix_row_value_input_first + matrix_partition.get_major_offset_from_major_nocheck(major)), - kv_map.find(key)->second); + kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); }); tmp_minor_keys.resize(0, handle.get_stream()); tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); @@ -488,11 +490,12 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto major_vertex_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), [major_vertices = major_vertices.data()] __device__(auto i) { - return ((i == 0) || (major_vertices[i] == major_vertices[i - 1])) + return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? major_vertices[i] : invalid_vertex_id::value; }); thrust::copy_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_vertex_first, major_vertex_first + major_vertices.size(), unique_major_vertices.begin(), @@ -506,9 +509,10 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( thrust::make_permutation_iterator( vertex_value_output_first, thrust::make_transform_iterator( - major_vertices.begin(), + unique_major_vertices.begin(), [vertex_partition = vertex_partition_device_t(graph_view)] __device__( auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })), + thrust::equal_to{}, reduce_op); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 70b6dc92752..0b3588bc8c5 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -17,6 +17,8 @@ #include #include +#include +#include #include #include #include @@ -124,6 +126,35 @@ __global__ void for_all_major_for_all_nbr_low_degree( } } +// FIXME: better derive value_t from BufferType +template +std::tuple, BufferType> reduce_to_unique_kv_pairs( + rmm::device_uvector&& keys, BufferType&& value_buffer, cudaStream_t stream) +{ + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer)); + auto num_uniques = + thrust::count_if(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(keys.size()), + [keys = keys.data()] __device__(auto i) { + return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; + }); + + rmm::device_uvector unique_keys(num_uniques, stream); + auto value_for_unique_key_buffer = allocate_dataframe_buffer(unique_keys.size(), stream); + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer), + unique_keys.begin(), + get_dataframe_buffer_begin(value_for_unique_key_buffer)); + + return std::make_tuple(std::move(unique_keys), std::move(value_for_unique_key_buffer)); +} + template (tmp_value_buffer)); } + std::tie(tmp_keys, tmp_value_buffer) = reduce_to_unique_kv_pairs( + std::move(tmp_keys), std::move(tmp_value_buffer), handle.get_stream()); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - get_dataframe_buffer_begin(tmp_value_buffer)); - - auto num_uniques = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(tmp_keys.size()), - [tmp_keys = tmp_keys.data()] __device__(auto i) { - return ((i == 0) || (tmp_keys[i] != tmp_keys[i - 1])) ? true : false; - }); - rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); - auto value_for_unique_key_buffer = - allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); - - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - get_dataframe_buffer_begin(tmp_value_buffer), - unique_keys.begin(), - get_dataframe_buffer_begin(value_for_unique_key_buffer)); - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = groupby_gpuid_and_shuffle_kv_pairs( comm, - unique_keys.begin(), - unique_keys.end(), - get_dataframe_buffer_begin(value_for_unique_key_buffer), + tmp_keys.begin(), + tmp_keys.end(), + get_dataframe_buffer_begin(tmp_value_buffer), [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( auto val) { return key_func(val); }, handle.get_stream()); - // FIXME: we can reduce after shuffle - - tmp_keys = std::move(rx_unique_keys); - tmp_value_buffer = std::move(rx_value_for_unique_key_buffer); + std::tie(tmp_keys, tmp_value_buffer) = reduce_to_unique_kv_pairs( + std::move(rx_unique_keys), std::move(rx_value_for_unique_key_buffer), handle.get_stream()); } auto cur_size = keys.size(); - // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can - // reserve address space to avoid expensive reallocation. - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management - keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); - resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); - - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_keys.begin(), - tmp_keys.end(), - keys.begin() + cur_size); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(tmp_value_buffer), - get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), - get_dataframe_buffer_begin(value_buffer) + cur_size); + if (cur_size == 0) { + keys = std::move(tmp_keys); + value_buffer = std::move(tmp_value_buffer); + } else { + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we + // can reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); + resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + keys.begin() + cur_size); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(tmp_value_buffer), + get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), + get_dataframe_buffer_begin(value_buffer) + cur_size); + } } if (GraphViewType::is_multi_gpu) { - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.end(), - get_dataframe_buffer_begin(value_buffer)); - - auto num_uniques = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(keys.size()), - [keys = keys.data()] __device__(auto i) { - return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; - }); - rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); - auto value_for_unique_key_buffer = - allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); - - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.end(), - get_dataframe_buffer_begin(value_buffer), - unique_keys.begin(), - get_dataframe_buffer_begin(value_for_unique_key_buffer)); - - keys = std::move(unique_keys); - value_buffer = std::move(value_for_unique_key_buffer); + std::tie(keys, value_buffer) = reduce_to_unique_kv_pairs( + std::move(keys), std::move(value_buffer), handle.get_stream()); } // FIXME: add init diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 24b9147ce3c..7b9956902cc 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -973,10 +973,10 @@ device_gatherv(raft::comms::comms_t const& comm, size_t constexpr tuple_size = thrust::tuple_size::value_type>::value; - detail::device_allgatherv_tuple_iterator_element_impl() + detail::device_gatherv_tuple_iterator_element_impl() .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } From 55896052e05f4e1d27def51391458cb08c3516ca Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Feb 2021 17:21:52 -0500 Subject: [PATCH 06/51] Add new primitives: compute_in|out_degrees, compute_in|out_weight_sums to graph_view_t (#1394) Close https://github.com/rapidsai/cugraph/issues/1208 - [x] add compute_in|out_degrees, compute_in|out_weight_sums - [x] replace PageRank's custom code to compute out-weight-sums to use graph_view_t's compute_out_weight_sums - [x] add SG C++ tests Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1394 --- .../experimental/detail/graph_utils.cuh | 22 +- cpp/include/experimental/graph_view.hpp | 12 + cpp/include/utilities/shuffle_comm.cuh | 4 +- cpp/src/experimental/graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 228 +++++++++++++++++- cpp/src/experimental/pagerank.cu | 20 +- cpp/src/experimental/renumber_edgelist.cu | 4 +- cpp/tests/CMakeLists.txt | 20 ++ cpp/tests/experimental/degree_test.cpp | 165 +++++++++++++ cpp/tests/experimental/weight_sum_test.cpp | 186 ++++++++++++++ 10 files changed, 637 insertions(+), 26 deletions(-) create mode 100644 cpp/tests/experimental/degree_test.cpp create mode 100644 cpp/tests/experimental/weight_sum_test.cpp diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 3ac2e2163c6..084d68b8ba4 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -39,7 +40,7 @@ namespace detail { // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template -rmm::device_uvector compute_major_degree( +rmm::device_uvector compute_major_degrees( raft::handle_t const &handle, std::vector const &adj_matrix_partition_offsets, partition_t const &partition) @@ -120,7 +121,7 @@ rmm::device_uvector compute_major_degree( // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = // false) or columns (of the graph adjacency matrix, if store_transposed = true) template -rmm::device_uvector compute_major_degree( +rmm::device_uvector compute_major_degrees( raft::handle_t const &handle, std::vector> const &adj_matrix_partition_offsets, partition_t const &partition) @@ -131,7 +132,22 @@ rmm::device_uvector compute_major_degree( adj_matrix_partition_offsets.end(), tmp_offsets.begin(), [](auto const &offsets) { return offsets.data(); }); - return compute_major_degree(handle, tmp_offsets, partition); + return compute_major_degrees(handle, tmp_offsets, partition); +} + +// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed = +// false) or columns (of the graph adjacency matrix, if store_transposed = true) +template +rmm::device_uvector compute_major_degrees(raft::handle_t const &handle, + edge_t const *offsets, + vertex_t number_of_vertices) +{ + rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); + thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + degrees.begin(), + degrees.end(), + [offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; }); + return degrees; } template diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index d2ae1150970..7598841fc1a 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -494,6 +494,12 @@ class graph_view_t(nullptr); } + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; + + rmm::device_uvector compute_in_weight_sums(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + private: std::vector adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; @@ -638,6 +644,12 @@ class graph_view_t compute_in_degrees(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; + + rmm::device_uvector compute_in_weight_sums(raft::handle_t const& handle) const; + rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 7e04c7e1972..da86f76b11d 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -69,7 +69,7 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, d_tx_value_counts = std::move(d_counts); } - return std::move(d_tx_value_counts); + return d_tx_value_counts; } template @@ -111,7 +111,7 @@ rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, d_tx_value_counts = std::move(d_counts); } - return std::move(d_tx_value_counts); + return d_tx_value_counts; } // inline to suppress a complaint about ODR violation diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 5cf393bfce4..498bb4eaefe 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -278,7 +278,7 @@ graph_tget_handle_ptr()), adj_matrix_partition_offsets_, partition_); // optional expensive checks (part 2/3) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index df92fd94194..f443608e424 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,83 @@ std::vector update_adj_matrix_partition_edge_counts( return adj_matrix_partition_edge_counts; } +template +rmm::device_uvector compute_minor_degrees( + raft::handle_t const& handle, + graph_view_t const& graph_view) +{ + rmm::device_uvector minor_degrees(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + if (store_transposed) { + copy_v_transform_reduce_out_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return edge_t{1}; + }, + edge_t{0}, + minor_degrees.data()); + } else { + copy_v_transform_reduce_in_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return edge_t{1}; + }, + edge_t{0}, + minor_degrees.data()); + } + + return minor_degrees; +} + +template +rmm::device_uvector compute_weight_sums( + raft::handle_t const& handle, + graph_view_t const& graph_view) +{ + rmm::device_uvector weight_sums(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + if (major == store_transposed) { + copy_v_transform_reduce_in_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w; + }, + weight_t{0.0}, + weight_sums.data()); + } else { + copy_v_transform_reduce_out_nbr( + handle, + graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w; + }, + weight_t{0.0}, + weight_sums.data()); + } + + return weight_sums; +} + } // namespace template on(default_stream), degrees.begin(), @@ -301,6 +379,154 @@ graph_view_t +rmm::device_uvector +graph_view_t>:: + compute_in_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return detail::compute_major_degrees( + handle, this->adj_matrix_partition_offsets_, this->partition_); + } else { + return compute_minor_degrees(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>::compute_in_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return detail::compute_major_degrees( + handle, this->offsets_, this->get_number_of_local_vertices()); + } else { + return compute_minor_degrees(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_out_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_minor_degrees(handle, *this); + } else { + return detail::compute_major_degrees( + handle, this->adj_matrix_partition_offsets_, this->partition_); + } +} + +template +rmm::device_uvector +graph_view_t>::compute_out_degrees(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_minor_degrees(handle, *this); + } else { + return detail::compute_major_degrees( + handle, this->offsets_, this->get_number_of_local_vertices()); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_in_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_in_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector +graph_view_t>:: + compute_out_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + +template +rmm::device_uvector graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_out_weight_sums(raft::handle_t const& handle) const +{ + if (store_transposed) { + return compute_weight_sums(handle, *this); + } else { + return compute_weight_sums(handle, *this); + } +} + // explicit instantiation template class graph_view_t; diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 058cbfe5966..c498d2864b4 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -142,23 +142,9 @@ void pagerank(raft::handle_t const& handle, // 2. compute the sums of the out-going edge weights (if not provided) - rmm::device_uvector tmp_vertex_out_weight_sums(0, handle.get_stream()); - if (precomputed_vertex_out_weight_sums == nullptr) { - tmp_vertex_out_weight_sums.resize(pull_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - // FIXME: better refactor this out (computing out-degree). - copy_v_transform_reduce_out_nbr( - handle, - pull_graph_view, - thrust::make_constant_iterator(0) /* dummy */, - thrust::make_constant_iterator(0) /* dummy */, - [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { - return w; - }, - weight_t{0.0}, - tmp_vertex_out_weight_sums.data()); - } - + auto tmp_vertex_out_weight_sums = precomputed_vertex_out_weight_sums == nullptr + ? pull_graph_view.compute_out_weight_sums(handle) + : rmm::device_uvector(0, handle.get_stream()); auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr ? precomputed_vertex_out_weight_sums : tmp_vertex_out_weight_sums.data(); diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 6a5a1c732c2..b093a9adb22 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -224,7 +224,7 @@ rmm::device_uvector compute_renumber_map( labels.begin(), thrust::greater()); - return std::move(labels); + return labels; } template @@ -609,7 +609,7 @@ std::enable_if_t> renumber_edgelist( renumber_map.find( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); - return std::move(renumber_map_labels); + return renumber_map_labels; #else return rmm::device_uvector(0, handle.get_stream()); #endif diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5425c68e896..68b277871b1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -331,6 +331,26 @@ set(EXPERIMENTAL_GRAPH_TEST_SRCS ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") +################################################################################################### +# - Experimental weight-sum tests ----------------------------------------------------------------- + +set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") + +ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") + +################################################################################################### +# - Experimental degree tests --------------------------------------------------------------------- + +set(EXPERIMENTAL_DEGREE_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") + +ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") + ################################################################################################### # - Experimental coarsening tests ----------------------------------------------------------------- diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp new file mode 100644 index 00000000000..7c7b41cdacc --- /dev/null +++ b/cpp/tests/experimental/degree_test.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void degree_reference(edge_t const* offsets, + vertex_t const* indices, + edge_t* degrees, + vertex_t num_vertices, + bool major) +{ + if (major) { + std::adjacent_difference(offsets + 1, offsets + num_vertices + 1, degrees); + } else { + std::fill(degrees, degrees + num_vertices, edge_t{0}); + for (vertex_t i = 0; i < num_vertices; ++i) { + for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { + auto nbr = indices[j]; + ++degrees[nbr]; + } + } + } + + return; +} + +typedef struct Degree_Usecase_t { + std::string graph_file_full_path{}; + + Degree_Usecase_t(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Degree_Usecase; + +class Tests_Degree : public ::testing::TestWithParam { + public: + Tests_Degree() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(Degree_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_in_degrees(graph_view.get_number_of_vertices()); + std::vector h_reference_out_degrees(graph_view.get_number_of_vertices()); + + degree_reference(h_offsets.data(), + h_indices.data(), + h_reference_in_degrees.data(), + graph_view.get_number_of_vertices(), + store_transposed); + + degree_reference(h_offsets.data(), + h_indices.data(), + h_reference_out_degrees.data(), + graph_view.get_number_of_vertices(), + !store_transposed); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto d_in_degrees = graph_view.compute_in_degrees(handle); + auto d_out_degrees = graph_view.compute_out_degrees(handle); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_in_degrees(graph_view.get_number_of_vertices()); + std::vector h_cugraph_out_degrees(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_in_degrees.data(), d_in_degrees.data(), d_in_degrees.size(), handle.get_stream()); + raft::update_host(h_cugraph_out_degrees.data(), + d_out_degrees.data(), + d_out_degrees.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(std::equal( + h_reference_in_degrees.begin(), h_reference_in_degrees.end(), h_cugraph_in_degrees.begin())) + << "In-degree values do not match with the reference values."; + ASSERT_TRUE(std::equal(h_reference_out_degrees.begin(), + h_reference_out_degrees.end(), + h_cugraph_out_degrees.begin())) + << "Out-degree values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_Degree, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_Degree, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_Degree, + ::testing::Values(Degree_Usecase("test/datasets/karate.mtx"), + Degree_Usecase("test/datasets/web-Google.mtx"), + Degree_Usecase("test/datasets/ljournal-2008.mtx"), + Degree_Usecase("test/datasets/webbase-1M.mtx"))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp new file mode 100644 index 00000000000..aeda7386314 --- /dev/null +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void weight_sum_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + weight_t* weight_sums, + vertex_t num_vertices, + bool major) +{ + if (!major) { std::fill(weight_sums, weight_sums + num_vertices, weight_t{0.0}); } + for (vertex_t i = 0; i < num_vertices; ++i) { + if (major) { + weight_sums[i] = + std::accumulate(weights + offsets[i], weights + offsets[i + 1], weight_t{0.0}); + } else { + for (auto j = offsets[i]; j < offsets[i + 1]; ++j) { + auto nbr = indices[j]; + weight_sums[nbr] += weights[j]; + } + } + } + + return; +} + +typedef struct WeightSum_Usecase_t { + std::string graph_file_full_path{}; + + WeightSum_Usecase_t(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} WeightSum_Usecase; + +class Tests_WeightSum : public ::testing::TestWithParam { + public: + Tests_WeightSum() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(WeightSum_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_in_weight_sums(graph_view.get_number_of_vertices()); + std::vector h_reference_out_weight_sums(graph_view.get_number_of_vertices()); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_in_weight_sums.data(), + graph_view.get_number_of_vertices(), + store_transposed); + + weight_sum_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_out_weight_sums.data(), + graph_view.get_number_of_vertices(), + !store_transposed); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto d_in_weight_sums = graph_view.compute_in_weight_sums(handle); + auto d_out_weight_sums = graph_view.compute_out_weight_sums(handle); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_in_weight_sums(graph_view.get_number_of_vertices()); + std::vector h_cugraph_out_weight_sums(graph_view.get_number_of_vertices()); + + raft::update_host(h_cugraph_in_weight_sums.data(), + d_in_weight_sums.data(), + d_in_weight_sums.size(), + handle.get_stream()); + raft::update_host(h_cugraph_out_weight_sums.data(), + d_out_weight_sums.data(), + d_out_weight_sums.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + auto threshold_ratio = weight_t{1e-4}; + auto threshold_magnitude = std::numeric_limits::min(); + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_in_weight_sums.begin(), + h_reference_in_weight_sums.end(), + h_cugraph_in_weight_sums.begin(), + nearly_equal)) + << "In-weight-sum values do not match with the reference values."; + ASSERT_TRUE(std::equal(h_reference_out_weight_sums.begin(), + h_reference_out_weight_sums.end(), + h_cugraph_out_weight_sums.begin(), + nearly_equal)) + << "Out-weight-sum values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_WeightSum, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_WeightSum, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_WeightSum, + ::testing::Values(WeightSum_Usecase("test/datasets/karate.mtx"), + WeightSum_Usecase("test/datasets/web-Google.mtx"), + WeightSum_Usecase("test/datasets/ljournal-2008.mtx"), + WeightSum_Usecase("test/datasets/webbase-1M.mtx"))); + +CUGRAPH_TEST_PROGRAM_MAIN() From ca895946189ae6cb00daa5c5bde1e37cb78788e4 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 1 Mar 2021 09:58:55 -0500 Subject: [PATCH 07/51] Add boost 1.0 license file. (#1401) #1411 added code (to address #1329) that follows the BOOST 1.0 license and this PR adds the BOOST 1.0 license to cuGraph codebase. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1401 --- thirdparty/LICENSES/LICENSE.boost | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 thirdparty/LICENSES/LICENSE.boost diff --git a/thirdparty/LICENSES/LICENSE.boost b/thirdparty/LICENSES/LICENSE.boost new file mode 100644 index 00000000000..36b7cd93cdf --- /dev/null +++ b/thirdparty/LICENSES/LICENSE.boost @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. From 0adc558ea57ee4d07957dda589770d73d5d514cb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 1 Mar 2021 14:02:33 -0500 Subject: [PATCH 08/51] Update C++ MG PageRank test (#1419) - [x] Add tests using graphs with isolated vertices - [x] Add personalized PageRank tests - [x] Test code refactoring - [x] Create libcugraphtestutil.a This PR fixes FIXMEs added in https://github.com/rapidsai/cugraph/pull/1361 to address https://github.com/rapidsai/cugraph/issues/1136 Authors: - Seunghwa Kang (@seunghwak) Approvers: - Rick Ratzel (@rlratzel) - Andrei Schaffer (@aschaffer) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1419 --- cpp/include/algorithms.hpp | 8 +- cpp/include/experimental/graph.hpp | 8 + cpp/include/experimental/graph_view.hpp | 4 + cpp/src/experimental/katz_centrality.cu | 28 +- cpp/src/experimental/pagerank.cu | 84 +- cpp/src/experimental/renumber_edgelist.cu | 9 +- cpp/tests/CMakeLists.txt | 102 +-- cpp/tests/community/egonet_test.cu | 8 +- cpp/tests/experimental/bfs_test.cpp | 9 +- cpp/tests/experimental/coarsen_graph_test.cpp | 8 +- cpp/tests/experimental/graph_test.cpp | 60 +- .../experimental/induced_subgraph_test.cpp | 8 +- .../experimental/katz_centrality_test.cpp | 7 +- cpp/tests/experimental/louvain_test.cu | 7 +- cpp/tests/experimental/pagerank_test.cpp | 11 +- cpp/tests/experimental/sssp_test.cpp | 9 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 296 +++++++ cpp/tests/pagerank/pagerank_mg_test.cpp | 229 ----- cpp/tests/utilities/base_fixture.hpp | 71 +- cpp/tests/utilities/mg_test_utilities.cu | 180 ---- cpp/tests/utilities/mg_test_utilities.hpp | 77 -- cpp/tests/utilities/test_utilities.cpp | 442 ---------- cpp/tests/utilities/test_utilities.cu | 788 ++++++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 41 +- 24 files changed, 1337 insertions(+), 1157 deletions(-) create mode 100644 cpp/tests/pagerank/mg_pagerank_test.cpp delete mode 100644 cpp/tests/pagerank/pagerank_mg_test.cpp delete mode 100644 cpp/tests/utilities/mg_test_utilities.cu delete mode 100644 cpp/tests/utilities/mg_test_utilities.hpp delete mode 100644 cpp/tests/utilities/test_utilities.cpp create mode 100644 cpp/tests/utilities/test_utilities.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index c666bce23ad..0b8bd59587f 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1100,9 +1100,9 @@ void sssp(raft::handle_t const &handle, template void pagerank(raft::handle_t const &handle, graph_view_t const &graph_view, - weight_t *adj_matrix_row_out_weight_sums, - vertex_t *personalization_vertices, - result_t *personalization_values, + weight_t const *adj_matrix_row_out_weight_sums, + vertex_t const *personalization_vertices, + result_t const *personalization_values, vertex_t personalization_vector_size, result_t *pageranks, result_t alpha, @@ -1148,7 +1148,7 @@ void pagerank(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index cc21f7c5013..6a10256e6f4 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -61,6 +61,8 @@ class graph_t() {} + graph_t(raft::handle_t const &handle, std::vector> const &edgelists, partition_t const &partition, @@ -123,6 +125,12 @@ class graph_t(), + offsets_(0, handle.get_stream()), + indices_(0, handle.get_stream()), + weights_(0, handle.get_stream()){}; + graph_t(raft::handle_t const &handle, edgelist_t const &edgelist, vertex_t number_of_vertices, diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 7598841fc1a..5d3d09bb087 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -82,6 +82,8 @@ namespace experimental { template class partition_t { public: + partition_t() = default; + partition_t(std::vector const& vertex_partition_offsets, bool hypergraph_partitioned, int row_comm_size, @@ -247,6 +249,8 @@ size_t constexpr num_segments_per_vertex_partition{3}; template class graph_base_t { public: + graph_base_t() = default; + graph_base_t(raft::handle_t const& handle, vertex_t number_of_vertices, edge_t number_of_edges, diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 1ab824f1c91..7ffef5053af 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -38,7 +38,7 @@ namespace detail { template void katz_centrality(raft::handle_t const &handle, GraphViewType const &pull_graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, // relevant only if betas == nullptr @@ -173,7 +173,7 @@ void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - result_t *betas, + result_t const *betas, result_t *katz_centralities, result_t alpha, result_t beta, // relevant only if beta == nullptr @@ -200,7 +200,7 @@ void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -212,7 +212,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -224,7 +224,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -236,7 +236,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -248,7 +248,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -260,7 +260,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -272,7 +272,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -284,7 +284,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -296,7 +296,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -308,7 +308,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, @@ -320,7 +320,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - float *betas, + float const *betas, float *katz_centralities, float alpha, float beta, @@ -332,7 +332,7 @@ template void katz_centrality(raft::handle_t const &handle, template void katz_centrality(raft::handle_t const &handle, graph_view_t const &graph_view, - double *betas, + double const *betas, double *katz_centralities, double alpha, double beta, diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index c498d2864b4..e5874acb04f 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -44,9 +44,9 @@ namespace detail { template void pagerank(raft::handle_t const& handle, GraphViewType const& pull_graph_view, - typename GraphViewType::weight_type* precomputed_vertex_out_weight_sums, - typename GraphViewType::vertex_type* personalization_vertices, - result_t* personalization_values, + typename GraphViewType::weight_type const* precomputed_vertex_out_weight_sums, + typename GraphViewType::vertex_type const* personalization_vertices, + result_t const* personalization_values, typename GraphViewType::vertex_type personalization_vector_size, result_t* pageranks, result_t alpha, @@ -279,9 +279,9 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - weight_t* precomputed_vertex_out_weight_sums, - vertex_t* personalization_vertices, - result_t* personalization_values, + weight_t const* precomputed_vertex_out_weight_sums, + vertex_t const* personalization_vertices, + result_t const* personalization_values, vertex_t personalization_vector_size, result_t* pageranks, result_t alpha, @@ -308,9 +308,9 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -321,9 +321,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -334,9 +334,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -347,9 +347,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -360,9 +360,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + float const* personalization_values, int64_t personalization_vector_size, float* pageranks, float alpha, @@ -373,9 +373,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + double const* personalization_values, int64_t personalization_vector_size, double* pageranks, double alpha, @@ -386,9 +386,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -399,9 +399,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -412,9 +412,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + float const* personalization_values, int32_t personalization_vector_size, float* pageranks, float alpha, @@ -425,9 +425,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int32_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int32_t const* personalization_vertices, + double const* personalization_values, int32_t personalization_vector_size, double* pageranks, double alpha, @@ -438,9 +438,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - float* personalization_values, + float const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + float const* personalization_values, int64_t personalization_vector_size, float* pageranks, float alpha, @@ -451,9 +451,9 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* precomputed_vertex_out_weight_sums, - int64_t* personalization_vertices, - double* personalization_values, + double const* precomputed_vertex_out_weight_sums, + int64_t const* personalization_vertices, + double const* personalization_values, int64_t personalization_vector_size, double* pageranks, double alpha, diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index b093a9adb22..a8847167b87 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -547,11 +547,10 @@ renumber_edgelist(raft::handle_t const& handle, return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); #else - return std::make_tuple( - rmm::device_uvector(0, handle.get_stream()), - partition_t(std::vector(), false, int{0}, int{0}, int{0}, int{0}), - vertex_t{0}, - edge_t{0}); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + partition_t{}, + vertex_t{0}, + edge_t{0}); #endif } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 68b277871b1..a93aa0cfabb 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -16,6 +16,43 @@ # #============================================================================= +################################################################################################### +# - common test utils ----------------------------------------------------------------------------- + +add_library(cugraphtestutil STATIC + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") + +set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) + +target_include_directories(cugraphtestutil + PRIVATE + "${CUB_INCLUDE_DIR}" + "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" + "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" + "${RMM_INCLUDE}" + "${NCCL_INCLUDE_DIRS}" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${RAFT_DIR}/cpp/include" +) + +target_link_libraries(cugraphtestutil cugraph) + +# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the +# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent +# cmake warnings about policy CMP0104. With this setting, arch flags must be +# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism +# used in cpp/CMakeLists.txt for setting arch options). +# Run "cmake --help-policy CMP0104" for policy details. +# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to +# the findcudatoolkit features in cmake 3.17+ +set_target_properties(cugraphtestutil PROPERTIES + CUDA_ARCHITECTURES OFF) + ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -31,8 +68,6 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${RMM_INCLUDE}" - "${CUDF_INCLUDE}" - "${CUDF_INCLUDE}/libcudf/libcudacxx" "${NCCL_INCLUDE_DIRS}" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" "${CMAKE_CURRENT_SOURCE_DIR}/../include" @@ -49,6 +84,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE + cugraphtestutil cugraph GTest::GTest GTest::Main @@ -140,16 +176,10 @@ endif(RAPIDS_DATASET_ROOT_DIR) ### test sources ################################################################################## ################################################################################################### -# FIXME: consider adding a "add_library(cugraph_testing SHARED ...) instead of -# adding the same test utility sources to each test target. There may need to be -# an additional cugraph_mg_testing lib due to the optional inclusion of MPI. - ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- set(KATZ_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") @@ -158,15 +188,11 @@ set(KATZ_TEST_SRC # - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") @@ -175,8 +201,6 @@ set(EDGE_BETWEENNESS_TEST_SRC # - SSSP tests ------------------------------------------------------------------------------------ set(SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") @@ -185,8 +209,6 @@ ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") # - BFS tests ------------------------------------------------------------------------------------- set(BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") @@ -195,8 +217,6 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") # - LOUVAIN tests --------------------------------------------------------------------------------- set(LOUVAIN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") @@ -205,8 +225,6 @@ ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") # - LEIDEN tests --------------------------------------------------------------------------------- set(LEIDEN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") @@ -215,8 +233,6 @@ ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") # - ECG tests --------------------------------------------------------------------------------- set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") @@ -225,8 +241,6 @@ ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") # - Balanced cut clustering tests ----------------------------------------------------------------- set(BALANCED_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") @@ -235,8 +249,6 @@ ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") # - TRIANGLE tests -------------------------------------------------------------------------------- set(TRIANGLE_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") @@ -245,8 +257,6 @@ ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") # - EGO tests -------------------------------------------------------------------------------- set(EGO_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") @@ -254,8 +264,6 @@ ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") # - RENUMBERING tests ----------------------------------------------------------------------------- set(RENUMBERING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") @@ -264,8 +272,6 @@ ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") @@ -274,8 +280,6 @@ ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") # - TSP tests -------------------------------------------------------------------------- set(TSP_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") @@ -284,8 +288,6 @@ set(TSP_TEST_SRC # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") @@ -294,8 +296,6 @@ ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") @@ -304,8 +304,6 @@ ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") #-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- set(HUNGARIAN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") @@ -314,8 +312,6 @@ ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") # - MST tests ---------------------------------------------------------------------------- set(MST_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") ConfigureTest(MST_TEST "${MST_TEST_SRC}") @@ -325,8 +321,6 @@ ConfigureTest(MST_TEST "${MST_TEST_SRC}") # - Experimental Graph tests ---------------------------------------------------------------------- set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") @@ -355,8 +349,6 @@ ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") # - Experimental coarsening tests ----------------------------------------------------------------- set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") @@ -365,8 +357,6 @@ ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST # - Experimental induced subgraph tests ----------------------------------------------------------- set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") @@ -375,8 +365,6 @@ ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAP # - Experimental BFS tests ------------------------------------------------------------------------ set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") @@ -385,8 +373,6 @@ ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") # - Experimental SSSP tests ----------------------------------------------------------------------- set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") @@ -395,8 +381,6 @@ ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") # - Experimental PAGERANK tests ------------------------------------------------------------------- set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") @@ -405,8 +389,6 @@ ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") # - Experimental LOUVAIN tests ------------------------------------------------------------------- set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") @@ -415,8 +397,6 @@ ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") @@ -424,16 +404,14 @@ ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_ ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- + if(BUILD_CUGRAPH_MG_TESTS) if(MPI_CXX_FOUND) ########################################################################################### # - MG PAGERANK tests --------------------------------------------------------------------- set(MG_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/mg_test_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_mg_test.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp") ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index ec031228998..ef2699bd1d0 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -69,9 +69,11 @@ class Tests_InducedEgo : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); rmm::device_uvector d_ego_sources(configuration.ego_sources.size(), diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 82286b1e2fa..5b8add98560 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,9 +102,10 @@ class Tests_BFS : public ::testing::TestWithParam { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index b790dfffa69..941b33e5661 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -273,9 +273,11 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam return; } - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); if (graph_view.get_number_of_vertices() == 0) { return; } diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index b80de68f95c..949f6d2e08e 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,10 +91,28 @@ class Tests_Graph : public ::testing::TestWithParam { template void run_current_test(Graph_Usecase const& configuration) { - auto mm_graph = - cugraph::test::read_edgelist_from_matrix_market_file( - configuration.graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); + raft::handle_t handle{}; + + rmm::device_uvector d_rows(0, handle.get_stream()); + rmm::device_uvector d_cols(0, handle.get_stream()); + rmm::device_uvector d_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_rows, d_cols, d_weights, number_of_vertices, is_symmetric) = + cugraph::test::read_edgelist_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + edge_t number_of_edges = static_cast(d_rows.size()); + + std::vector h_rows(number_of_edges); + std::vector h_cols(number_of_edges); + std::vector h_weights(configuration.test_weighted ? number_of_edges : edge_t{0}); + + raft::update_host(h_rows.data(), d_rows.data(), number_of_edges, handle.get_stream()); + raft::update_host(h_cols.data(), d_cols.data(), number_of_edges, handle.get_stream()); + if (configuration.test_weighted) { + raft::update_host(h_weights.data(), d_weights.data(), number_of_edges, handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); std::vector h_reference_offsets{}; std::vector h_reference_indices{}; @@ -102,28 +120,12 @@ class Tests_Graph : public ::testing::TestWithParam { std::tie(h_reference_offsets, h_reference_indices, h_reference_weights) = graph_reference( - mm_graph.h_rows.data(), - mm_graph.h_cols.data(), - configuration.test_weighted ? mm_graph.h_weights.data() : nullptr, - mm_graph.number_of_vertices, + h_rows.data(), + h_cols.data(), + configuration.test_weighted ? h_weights.data() : static_cast(nullptr), + number_of_vertices, number_of_edges); - raft::handle_t handle{}; - - rmm::device_uvector d_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_weights(configuration.test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (configuration.test_weighted) { - raft::update_device( - d_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - cugraph::experimental::edgelist_t edgelist{ d_rows.data(), d_cols.data(), @@ -136,8 +138,8 @@ class Tests_Graph : public ::testing::TestWithParam { cugraph::experimental::graph_t( handle, edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, false, true); @@ -145,7 +147,7 @@ class Tests_Graph : public ::testing::TestWithParam { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - ASSERT_EQ(graph_view.get_number_of_vertices(), mm_graph.number_of_vertices); + ASSERT_EQ(graph_view.get_number_of_vertices(), number_of_vertices); ASSERT_EQ(graph_view.get_number_of_edges(), number_of_edges); std::vector h_cugraph_offsets(graph_view.get_number_of_vertices() + 1); @@ -174,7 +176,7 @@ class Tests_Graph : public ::testing::TestWithParam { std::equal(h_reference_offsets.begin(), h_reference_offsets.end(), h_cugraph_offsets.begin())) << "Graph compressed sparse format offsets do not match with the reference values."; ASSERT_EQ(h_reference_weights.size(), h_cugraph_weights.size()); - for (vertex_t i = 0; i < mm_graph.number_of_vertices; ++i) { + for (vertex_t i = 0; i < number_of_vertices; ++i) { auto start = h_reference_offsets[i]; auto degree = h_reference_offsets[i + 1] - start; if (configuration.test_weighted) { diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 72894a9349f..4e0ca9e7d92 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -113,9 +113,11 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 3e9f0b478a0..945248cc4de 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -117,9 +117,10 @@ class Tests_KatzCentrality : public ::testing::TestWithParam( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 35a26923df6..56fb2c109bf 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -69,9 +69,10 @@ class Tests_Louvain : public ::testing::TestWithParam { std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 53143bf0bf3..514f73e3311 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -155,9 +155,10 @@ class Tests_PageRank : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -225,11 +226,11 @@ class Tests_PageRank : public ::testing::TestWithParam { handle.get_stream()); } - std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); - result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; + std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); + pagerank_reference(h_offsets.data(), h_indices.data(), h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 2f7cc499d35..7fd59d49a25 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,9 +106,10 @@ class Tests_SSSP : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true); + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp new file mode 100644 index 00000000000..cf9f452162b --- /dev/null +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include + +typedef struct Pagerank_Usecase_t { + std::string graph_file_full_path{}; + double personalization_ratio{0.0}; + bool test_weighted{false}; + + Pagerank_Usecase_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Pagerank_Usecase; + +class Tests_MGPageRank : public ::testing::TestWithParam { + public: + Tests_MGPageRank() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run + template + void run_current_test(Pagerank_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create SG & MG graphs + + cugraph::experimental::graph_t sg_graph(handle); + rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); + std::tie(sg_graph, d_sg_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, true); + + auto sg_graph_view = sg_graph.view(); + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, true); + + auto mg_graph_view = mg_graph.view(); + + std::vector h_sg_renumber_map_labels(d_sg_renumber_map_labels.size()); + raft::update_host(h_sg_renumber_map_labels.data(), + d_sg_renumber_map_labels.data(), + d_sg_renumber_map_labels.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(mg_graph_view.get_number_of_local_vertices()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 2. generate personalization vertex/value pairs + + std::vector h_personalization_vertices{}; + std::vector h_personalization_values{}; + if (configuration.personalization_ratio > 0.0) { + std::default_random_engine generator{}; + std::uniform_real_distribution distribution{0.0, 1.0}; + h_personalization_vertices.resize(sg_graph_view.get_number_of_vertices()); + std::iota(h_personalization_vertices.begin(), h_personalization_vertices.end(), vertex_t{0}); + h_personalization_vertices.erase( + std::remove_if(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + [&generator, &distribution, configuration](auto v) { + return distribution(generator) >= configuration.personalization_ratio; + }), + h_personalization_vertices.end()); + h_personalization_values.resize(h_personalization_vertices.size()); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [&distribution, &generator](auto& val) { val = distribution(generator); }); + } + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + // 3. run SG pagerank + + std::vector h_sg_personalization_vertices{}; + std::vector h_sg_personalization_values{}; + if (h_personalization_vertices.size() > 0) { + for (vertex_t i = 0; i < sg_graph_view.get_number_of_vertices(); ++i) { + auto it = std::lower_bound(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + h_sg_renumber_map_labels[i]); + if (*it == h_sg_renumber_map_labels[i]) { + h_sg_personalization_vertices.push_back(i); + h_sg_personalization_values.push_back( + h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); + } + } + } + + rmm::device_uvector d_sg_personalization_vertices( + h_sg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_sg_personalization_values(d_sg_personalization_vertices.size(), + handle.get_stream()); + if (d_sg_personalization_vertices.size() > 0) { + raft::update_device(d_sg_personalization_vertices.data(), + h_sg_personalization_vertices.data(), + h_sg_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_sg_personalization_values.data(), + h_sg_personalization_values.data(), + h_sg_personalization_values.size(), + handle.get_stream()); + } + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_personalization_vertices.data(), + d_sg_personalization_values.data(), + static_cast(d_sg_personalization_vertices.size()), + d_sg_pageranks.begin(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false, + false); + + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 4. run MG pagerank + + std::vector h_mg_personalization_vertices{}; + std::vector h_mg_personalization_values{}; + if (h_personalization_vertices.size() > 0) { + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto it = std::lower_bound(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + h_mg_renumber_map_labels[i]); + if (*it == h_mg_renumber_map_labels[i]) { + h_mg_personalization_vertices.push_back(mg_graph_view.get_local_vertex_first() + i); + h_mg_personalization_values.push_back( + h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); + } + } + } + + rmm::device_uvector d_mg_personalization_vertices( + h_mg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_mg_personalization_values(d_mg_personalization_vertices.size(), + handle.get_stream()); + if (d_mg_personalization_vertices.size() > 0) { + raft::update_device(d_mg_personalization_vertices.data(), + h_mg_personalization_vertices.data(), + h_mg_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_mg_personalization_values.data(), + h_mg_personalization_values.data(), + h_mg_personalization_values.size(), + handle.get_stream()); + } + + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::pagerank(handle, + mg_graph_view, + static_cast(nullptr), + d_mg_personalization_vertices.data(), + d_mg_personalization_values.data(), + static_cast(d_mg_personalization_vertices.size()), + d_mg_pageranks.begin(), + alpha, + epsilon, + std::numeric_limits::max(), + false, + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 5. copmare SG & MG results + + std::vector h_sg_shuffled_pageranks(sg_graph_view.get_number_of_vertices(), + result_t{0.0}); + for (size_t i = 0; i < h_sg_pageranks.size(); ++i) { + h_sg_shuffled_pageranks[h_sg_renumber_map_labels[i]] = h_sg_pageranks[i]; + } + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_shuffled_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << i << " in rank: " << comm_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_shuffled_pageranks[mapped_vertex]; + } + } +}; + +TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGPageRank, + ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/karate.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/karate.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/karate.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/pagerank_mg_test.cpp b/cpp/tests/pagerank/pagerank_mg_test.cpp deleted file mode 100644 index 7f789226bf1..00000000000 --- a/cpp/tests/pagerank/pagerank_mg_test.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include - -#include - -#include - -//////////////////////////////////////////////////////////////////////////////// -// Test param object. This defines the input and expected output for a test, and -// will be instantiated as the parameter to the tests defined below using -// INSTANTIATE_TEST_CASE_P() -// -typedef struct Pagerank_Testparams_t { - std::string graph_file_full_path{}; - double personalization_ratio{0.0}; - bool test_weighted{false}; - - Pagerank_Testparams_t(std::string const& graph_file_path, - double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - }; -} Pagerank_Testparams_t; - -//////////////////////////////////////////////////////////////////////////////// -// Parameterized test fixture, to be used with TEST_P(). This defines common -// setup and teardown steps as well as common utilities used by each E2E MG -// test. In this case, each test is identical except for the inputs and -// expected outputs, so the entire test is defined in the run_test() method. -// -class Pagerank_E2E_MG_Testfixture_t : public cugraph::test::MG_TestFixture_t, - public ::testing::WithParamInterface { - public: - Pagerank_E2E_MG_Testfixture_t() {} - - // Run once for each test instance - virtual void SetUp() {} - virtual void TearDown() {} - - // Return the results of running pagerank on a single GPU for the dataset in - // graph_file_path. - template - std::vector get_sg_results(raft::handle_t& handle, - const std::string& graph_file_path, - const result_t alpha, - const result_t epsilon) - { - auto graph = - cugraph::test::read_graph_from_matrix_market_file( - handle, graph_file_path, true); // FIXME: should use param.test_weighted instead of true - - auto graph_view = graph.view(); - cudaStream_t stream = handle.get_stream(); - rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), stream); - - cugraph::experimental::pagerank( - handle, - graph_view, - static_cast(nullptr), // adj_matrix_row_out_weight_sums - static_cast(nullptr), // personalization_vertices - static_cast(nullptr), // personalization_values - static_cast(0), // personalization_vector_size - d_pageranks.begin(), // pageranks - alpha, // alpha (damping factor) - epsilon, // error tolerance for convergence - std::numeric_limits::max(), // max_iterations - false, // has_initial_guess - true); // do_expensive_check - - std::vector h_pageranks(graph_view.get_number_of_vertices()); - raft::update_host(h_pageranks.data(), d_pageranks.data(), d_pageranks.size(), stream); - - return h_pageranks; - } - - // Compare the results of running pagerank on multiple GPUs to that of a - // single-GPU run for the configuration in param. - template - void run_test(const Pagerank_Testparams_t& param) - { - result_t constexpr alpha{0.85}; - result_t constexpr epsilon{1e-6}; - - raft::handle_t handle; - raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); - const auto& comm = handle.get_comms(); - - cudaStream_t stream = handle.get_stream(); - - // Assuming 2 GPUs which means 1 row, 2 cols. 2 cols = row_comm_size of 2. - // FIXME: DO NOT ASSUME 2 GPUs, add code to compute prows, pcols - size_t row_comm_size{2}; - cugraph::partition_2d::subcomm_factory_t - subcomm_factory(handle, row_comm_size); - - int my_rank = comm.get_rank(); - - // FIXME: graph must be weighted! - std::unique_ptr> // store_transposed=true, - // multi_gpu=true - mg_graph_ptr{}; - rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); - - std::tie(mg_graph_ptr, d_renumber_map_labels) = cugraph::test:: - create_graph_for_gpu // store_transposed=true - (handle, param.graph_file_full_path); - - auto mg_graph_view = mg_graph_ptr->view(); - - rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_vertices(), stream); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - cugraph::experimental::pagerank( - handle, - mg_graph_view, - static_cast(nullptr), // adj_matrix_row_out_weight_sums - static_cast(nullptr), // personalization_vertices - static_cast(nullptr), // personalization_values - static_cast(0), // personalization_vector_size - d_mg_pageranks.begin(), // pageranks - alpha, // alpha (damping factor) - epsilon, // error tolerance for convergence - std::numeric_limits::max(), // max_iterations - false, // has_initial_guess - true); // do_expensive_check - - std::vector h_mg_pageranks(mg_graph_view.get_number_of_vertices()); - - raft::update_host(h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), stream); - - std::vector h_renumber_map_labels(mg_graph_view.get_number_of_vertices()); - raft::update_host(h_renumber_map_labels.data(), - d_renumber_map_labels.data(), - d_renumber_map_labels.size(), - stream); - - // Compare MG to SG - // Each GPU will have pagerank values for their range, so ech GPU must - // compare to specific SG results for their respective range. - - auto h_sg_pageranks = get_sg_results( - handle, param.graph_file_full_path, alpha, epsilon); - - // For this test, each GPU will have the full set of vertices and - // therefore the pageranks vectors should be equal in size. - ASSERT_EQ(h_sg_pageranks.size(), h_mg_pageranks.size()); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - vertex_t mapped_vertex{0}; - for (vertex_t i = 0; - i + mg_graph_view.get_local_vertex_first() < mg_graph_view.get_local_vertex_last(); - ++i) { - mapped_vertex = h_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << i << " in rank: " << my_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " << h_sg_pageranks[i]; - } - } -}; - -//////////////////////////////////////////////////////////////////////////////// -TEST_P(Pagerank_E2E_MG_Testfixture_t, CheckInt32Int32FloatFloat) -{ - run_test(GetParam()); -} - -INSTANTIATE_TEST_CASE_P( - e2e, - Pagerank_E2E_MG_Testfixture_t, - - // FIXME: the personalization_ratio and use_weighted boo are not used - // (personilization vectors are not used, and all datasets are assumed - // weighted). update this to use personilization vectors and non-weighted - // graphs. - ::testing::Values(Pagerank_Testparams_t("test/datasets/karate.mtx", 0.0, true), - // FIXME: The commented datasets contain isolate vertices - // which result in a different number of vertices in the - // renumbered MG graph (because the renumbering function - // does not include them) vs. the SG graph object used for - // the pagerank comparison because the SG graph reads the - // COO as-is without renumbering. Update the utility that - // reads a .mtx and constructs a SG graph object to also - // renumber and return the renumber vertices vector. This - // will result in a comparison of an equal number of - // pagerank values. - // - // Pagerank_Testparams_t("test/datasets/web-Google.mtx", 0.0, true), - // Pagerank_Testparams_t("test/datasets/ljournal-2008.mtx", 0.0, true), - Pagerank_Testparams_t("test/datasets/webbase-1M.mtx", 0.0, true))); - -// FIXME: Enable proper RMM configuration by using CUGRAPH_TEST_PROGRAM_MAIN(). -// Currently seeing a RMM failure during init, need to investigate. -// CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 3525db73425..e8f11acfbf4 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -32,18 +32,17 @@ namespace cugraph { namespace test { -// FIXME: The BaseFixture class is not used in any tests. This file is only -// needed for the CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so -// consider removing the BaseFixture class and renaming this file, or moving -// CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file and removing this -// file completely. +// FIXME: The BaseFixture class is not used in any tests. This file is only needed for the +// CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so consider removing the BaseFixture +// class and renaming this file, or moving CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file +// and removing this file completely. /** - * @brief Base test fixture class from which all libcudf tests should inherit. + * @brief Base test fixture class from which all libcugraph tests should inherit. * * Example: * ``` - * class MyTestFixture : public cudf::test::BaseFixture {}; + * class MyTestFixture : public cugraph::test::BaseFixture {}; * ``` **/ class BaseFixture : public ::testing::Test { @@ -51,8 +50,8 @@ class BaseFixture : public ::testing::Test { public: /** - * @brief Returns pointer to `device_memory_resource` that should be used for - * all tests inheriting from this fixture + * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting + *from this fixture **/ rmm::mr::device_memory_resource *mr() { return _mr; } }; @@ -77,15 +76,14 @@ inline auto make_binning() } /** - * @brief Creates a memory resource for the unit test environment - * given the name of the allocation mode. + * @brief Creates a memory resource for the unit test environment given the name of the allocation + * mode. * - * The returned resource instance must be kept alive for the duration of - * the tests. Attaching the resource to a TestEnvironment causes - * issues since the environment objects are not destroyed until + * The returned resource instance must be kept alive for the duration of the tests. Attaching the + * resource to a TestEnvironment causes issues since the environment objects are not destroyed until * after the runtime is shutdown. * - * @throw cudf::logic_error if the `allocation_mode` is unsupported. + * @throw cugraph::logic_error if the `allocation_mode` is unsupported. * * @param allocation_mode String identifies which resource type. * Accepted types are "pool", "cuda", and "managed" only. @@ -105,17 +103,17 @@ inline std::shared_ptr create_memory_resource( } // namespace cugraph /** - * @brief Parses the cuDF test command line options. + * @brief Parses the cuGraph test command line options. * - * Currently only supports 'rmm_mode' string paramater, which set the rmm - * allocation mode. The default value of the parameter is 'pool'. + * Currently only supports 'rmm_mode' string paramater, which set the rmm allocation mode. The + * default value of the parameter is 'pool'. * * @return Parsing results in the form of cxxopts::ParseResult */ inline auto parse_test_options(int argc, char **argv) { try { - cxxopts::Options options(argv[0], " - cuDF tests command line options"); + cxxopts::Options options(argv[0], " - cuGraph tests command line options"); options.allow_unrecognised_options().add_options()( "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value("pool")); @@ -128,13 +126,11 @@ inline auto parse_test_options(int argc, char **argv) /** * @brief Macro that defines main function for gtest programs that use rmm * - * Should be included in every test program that uses rmm allocators since - * it maintains the lifespan of the rmm default memory resource. - * This `main` function is a wrapper around the google test generated `main`, - * maintaining the original functionality. In addition, this custom `main` - * function parses the command line to customize test behavior, like the - * allocation mode used for creating the default memory resource. - * + * Should be included in every test program that uses rmm allocators since it maintains the lifespan + * of the rmm default memory resource. This `main` function is a wrapper around the google test + * generated `main`, maintaining the original functionality. In addition, this custom `main` + * function parses the command line to customize test behavior, like the allocation mode used for + * creating the default memory resource. */ #define CUGRAPH_TEST_PROGRAM_MAIN() \ int main(int argc, char **argv) \ @@ -146,3 +142,26 @@ inline auto parse_test_options(int argc, char **argv) rmm::mr::set_current_device_resource(resource.get()); \ return RUN_ALL_TESTS(); \ } + +#define CUGRAPH_MG_TEST_PROGRAM_MAIN() \ + int main(int argc, char **argv) \ + { \ + MPI_TRY(MPI_Init(&argc, &argv)); \ + int comm_rank{}; \ + int comm_size{}; \ + MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank)); \ + MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &comm_size)); \ + int num_gpus{}; \ + CUDA_TRY(cudaGetDeviceCount(&num_gpus)); \ + CUGRAPH_EXPECTS( \ + comm_size <= num_gpus, "# MPI ranks (%d) > # GPUs (%d).", comm_size, num_gpus); \ + CUDA_TRY(cudaSetDevice(comm_rank)); \ + ::testing::InitGoogleTest(&argc, argv); \ + auto const cmd_opts = parse_test_options(argc, argv); \ + auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ + auto resource = cugraph::test::create_memory_resource(rmm_mode); \ + rmm::mr::set_current_device_resource(resource.get()); \ + auto ret = RUN_ALL_TESTS(); \ + MPI_TRY(MPI_Finalize()); \ + return ret; \ + } diff --git a/cpp/tests/utilities/mg_test_utilities.cu b/cpp/tests/utilities/mg_test_utilities.cu deleted file mode 100644 index 26f2450b589..00000000000 --- a/cpp/tests/utilities/mg_test_utilities.cu +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -namespace cugraph { -namespace test { - -// Given a raft handle and a path to a dataset (must be a .mtx file), returns a -// tuple containing: -// * graph_t instance for the partition accesible from the raft handle -// * vector of indices representing the original unrenumberd vertices -// -// This function creates a graph_t instance appropriate for MG graph -// applications from the edgelist graph data file passed in by filtering out the -// vertices not to be assigned to the GPU in this rank, then renumbering the -// vertices appropriately. The returned vector of vertices contains the original -// vertex IDs, ordered by the new sequential renumbered IDs (this is needed for -// unrenumbering). -template -std::tuple< - std::unique_ptr>, // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path) -{ - const auto& comm = handle.get_comms(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - - int my_rank = comm.get_rank(); - - auto edgelist_from_mm = - ::cugraph::test::read_edgelist_from_matrix_market_file( - graph_file_path); - - edge_t total_number_edges = static_cast(edgelist_from_mm.h_rows.size()); - - ////////// - // Copy COO to device - rmm::device_uvector d_edgelist_rows(total_number_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(total_number_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(total_number_edges, handle.get_stream()); - - raft::update_device(d_edgelist_rows.data(), - edgelist_from_mm.h_rows.data(), - total_number_edges, - handle.get_stream()); - raft::update_device(d_edgelist_cols.data(), - edgelist_from_mm.h_cols.data(), - total_number_edges, - handle.get_stream()); - raft::update_device(d_edgelist_weights.data(), - edgelist_from_mm.h_weights.data(), - total_number_edges, - handle.get_stream()); - - ////////// - // Filter out edges that are not to be associated with this rank - // - // Create a edge_gpu_identifier, which will be used by the individual jobs to - // identify if a edge belongs to a particular rank - cugraph::experimental::detail::compute_gpu_id_from_edge_t edge_gpu_identifier{ - false, comm.get_size(), row_comm.get_size(), col_comm.get_size()}; - - auto edgelist_zip_it_begin = thrust::make_zip_iterator(thrust::make_tuple( - d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); - bool is_transposed{store_transposed}; - - // Do the removal - note: remove_if does not delete items, it moves "removed" - // items to the back of the vector and returns the iterator (new_end) that - // represents the items kept. Actual removal of items can be done by - // resizing (see below). - auto new_end = thrust::remove_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_zip_it_begin, - edgelist_zip_it_begin + total_number_edges, - [my_rank, is_transposed, edge_gpu_identifier] __device__(auto tup) { - if (is_transposed) { - return (edge_gpu_identifier(thrust::get<1>(tup), thrust::get<0>(tup)) != my_rank); - } else { - return (edge_gpu_identifier(thrust::get<0>(tup), thrust::get<1>(tup)) != my_rank); - } - }); - - edge_t local_number_edges = thrust::distance(edgelist_zip_it_begin, new_end); - // Free the memory used for the items remove_if "removed". This not only - // frees memory, but keeps the actual vector sizes consistent with the data - // being used from this point forward. - d_edgelist_rows.resize(local_number_edges, handle.get_stream()); - d_edgelist_rows.shrink_to_fit(handle.get_stream()); - d_edgelist_cols.resize(local_number_edges, handle.get_stream()); - d_edgelist_cols.shrink_to_fit(handle.get_stream()); - d_edgelist_weights.resize(local_number_edges, handle.get_stream()); - d_edgelist_weights.shrink_to_fit(handle.get_stream()); - - ////////// - // renumber filtered edgelist_from_mm - vertex_t* major_vertices{nullptr}; - vertex_t* minor_vertices{nullptr}; - if (is_transposed) { - major_vertices = d_edgelist_cols.data(); - minor_vertices = d_edgelist_rows.data(); - } else { - major_vertices = d_edgelist_rows.data(); - minor_vertices = d_edgelist_cols.data(); - } - - rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - cugraph::experimental::partition_t partition( - std::vector(comm.get_size() + 1, 0), - false, // is_hypergraph_partitioned() - row_comm.get_size(), - col_comm.get_size(), - row_comm.get_rank(), - col_comm.get_rank()); - vertex_t number_of_vertices{}; - edge_t number_of_edges{}; - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - ::cugraph::experimental::renumber_edgelist // multi_gpu=true - (handle, - major_vertices, // edgelist_major_vertices, INOUT of vertex_t* - minor_vertices, // edgelist_minor_vertices, INOUT of vertex_t* - local_number_edges, - false, // is_hypergraph_partitioned - true); // do_expensive_check - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), d_edgelist_cols.data(), d_edgelist_weights.data(), local_number_edges}; - - std::vector> edgelist_vect; - edgelist_vect.push_back(edgelist); - cugraph::experimental::graph_properties_t properties; - properties.is_symmetric = edgelist_from_mm.is_symmetric; - properties.is_multigraph = false; - - // Finally, create instance of graph_t using filtered & renumbered edgelist - return std::make_tuple( - std::make_unique< - cugraph::experimental::graph_t>( - handle, - edgelist_vect, - partition, - number_of_vertices, - total_number_edges, - properties, - false, // sorted_by_global_degree_within_vertex_partition - true), // do_expensive_check - std::move(renumber_map_labels)); -} - -// explicit instantiation -template std::tuple< - std::unique_ptr< - cugraph::experimental::graph_t>, // store_transposed=true - // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/mg_test_utilities.hpp b/cpp/tests/utilities/mg_test_utilities.hpp deleted file mode 100644 index c23f6c43a6d..00000000000 --- a/cpp/tests/utilities/mg_test_utilities.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -#include - -namespace cugraph { -namespace test { - -// Given a raft handle and a path to a dataset (must be a .mtx file), returns a -// tuple containing: -// * graph_t instance for the partition accesible from the raft handle -// * 4-tuple containing renumber info resulting from renumbering the -// edgelist for the partition -template -std::tuple< - std::unique_ptr>, // multi_gpu=true - rmm::device_uvector> -create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); - -/** - * @brief Base test fixture class, responsible for handling common operations - * needed by all MG tests. - * - * It's expected this class will be built out and refactored often as new MG C++ - * tests are added and new patterns evolve. - * - * Example: - * ``` - * class MyTestFixture : public cugraph::test::MG_TestFixture_t {}; - * ``` - **/ - -// FIXME: consider moving this to a separate file? (eg. mg_test_fixture.cpp)? - -class MG_TestFixture_t : public ::testing::Test { - public: - static void SetUpTestCase() - { - MPI_TRY(MPI_Init(NULL, NULL)); - - int rank, size; - MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); - MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &size)); - - int nGpus; - CUDA_CHECK(cudaGetDeviceCount(&nGpus)); - - ASSERT( - nGpus >= size, "Number of GPUs are lesser than MPI ranks! ngpus=%d, nranks=%d", nGpus, size); - - CUDA_CHECK(cudaSetDevice(rank)); - } - - static void TearDownTestCase() { MPI_TRY(MPI_Finalize()); } -}; - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cpp b/cpp/tests/utilities/test_utilities.cpp deleted file mode 100644 index abb416a632d..00000000000 --- a/cpp/tests/utilities/test_utilities.cpp +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include -#include -#include - -#include - -extern "C" { -#include "mmio.h" -} - -#include - -#include -#include -#include -#include - -namespace cugraph { -namespace test { - -std::string getFileName(const std::string& s) -{ - char sep = '/'; -#ifdef _WIN32 - sep = '\\'; -#endif - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} - -/// Read matrix properties from Matrix Market file -/** Matrix Market file is assumed to be a sparse matrix in coordinate - * format. - * - * @param f File stream for Matrix Market file. - * @param tg Boolean indicating whether to convert matrix to general - * format (from symmetric, Hermitian, or skew symmetric format). - * @param t (Output) MM_typecode with matrix properties. - * @param m (Output) Number of matrix rows. - * @param n (Output) Number of matrix columns. - * @param nnz (Output) Number of non-zero matrix entries. - * @return Zero if properties were read successfully. Otherwise - * non-zero. - */ -template -int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) -{ - // Read matrix properties from file - int mint, nint, nnzint; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - *m = mint; - *n = nint; - *nnz = nnzint; - - // Find total number of non-zero entries - if (tg && !mm_is_general(*t)) { - // Non-diagonal entries should be counted twice - *nnz *= 2; - - // Diagonal entries should not be double-counted - int st; - for (int i = 0; i < nnzint; ++i) { - // Read matrix entry - // MTX only supports int for row and col idx - int row, col; - double rval, ival; - if (mm_is_pattern(*t)) - st = fscanf(f, "%d %d\n", &row, &col); - else if (mm_is_real(*t) || mm_is_integer(*t)) - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Check if entry is diagonal - if (row == col) --(*nnz); - } - } - - return 0; -} - -/// Read Matrix Market file and convert to COO format matrix -/** Matrix Market file is assumed to be a sparse matrix in coordinate - * format. - * - * @param f File stream for Matrix Market file. - * @param tg Boolean indicating whether to convert matrix to general - * format (from symmetric, Hermitian, or skew symmetric format). - * @param nnz Number of non-zero matrix entries. - * @param cooRowInd (Output) Row indices for COO matrix. Should have - * at least nnz entries. - * @param cooColInd (Output) Column indices for COO matrix. Should - * have at least nnz entries. - * @param cooRVal (Output) Real component of COO matrix - * entries. Should have at least nnz entries. Ignored if null - * pointer. - * @param cooIVal (Output) Imaginary component of COO matrix - * entries. Should have at least nnz entries. Ignored if null - * pointer. - * @return Zero if matrix was read successfully. Otherwise non-zero. - */ -template -int mm_to_coo(FILE* f, - int tg, - IndexType_ nnz, - IndexType_* cooRowInd, - IndexType_* cooColInd, - ValueType_* cooRVal, - ValueType_* cooIVal) -{ - // Read matrix properties from file - MM_typecode t; - int m, n, nnzOld; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, &t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - - // Add each matrix entry in file to COO format matrix - int i; // Entry index in Matrix Market file; can only be int in the MTX format - int j = 0; // Entry index in COO format matrix; can only be int in the MTX format - for (i = 0; i < nnzOld; ++i) { - // Read entry from file - int row, col; - double rval, ival; - int st; - if (mm_is_pattern(t)) { - st = fscanf(f, "%d %d\n", &row, &col); - rval = 1.0; - ival = 0.0; - } else if (mm_is_real(t) || mm_is_integer(t)) { - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - ival = 0.0; - } else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Switch to 0-based indexing - --row; - --col; - - // Record entry - cooRowInd[j] = row; - cooColInd[j] = col; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - - // Add symmetric complement of non-diagonal entries - if (tg && !mm_is_general(t) && (row != col)) { - // Modify entry value if matrix is skew symmetric or Hermitian - if (mm_is_skew(t)) { - rval = -rval; - ival = -ival; - } else if (mm_is_hermitian(t)) { - ival = -ival; - } - - // Record entry - cooRowInd[j] = col; - cooColInd[j] = row; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - } - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - double* t_storage = new double[n]; - is_read1 = fread(t_storage, sizeof(double), n, fpin); - for (int i = 0; i < n; i++) { - if (t_storage[i] == DBL_MAX) - val[i] = FLT_MAX; - else if (t_storage[i] == -DBL_MAX) - val[i] = -FLT_MAX; - else - val[i] = static_cast(t_storage[i]); - } - delete[] t_storage; - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - is_read1 = fread(&val[0], sizeof(double), n, fpin); - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -// FIXME: A similar function could be useful for CSC format -// There are functions above that operate coo -> csr and coo->csc -/** - * @tparam - */ -template -std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file) -{ - vertex_t number_of_vertices; - edge_t number_of_edges; - - FILE* fpin = fopen(mm_file.c_str(), "r"); - EXPECT_NE(fpin, nullptr); - - vertex_t number_of_columns = 0; - MM_typecode mm_typecode{0}; - EXPECT_EQ(mm_properties( - fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), - 0); - EXPECT_TRUE(mm_is_matrix(mm_typecode)); - EXPECT_TRUE(mm_is_coordinate(mm_typecode)); - EXPECT_FALSE(mm_is_complex(mm_typecode)); - EXPECT_FALSE(mm_is_skew(mm_typecode)); - - directed = !mm_is_symmetric(mm_typecode); - - // Allocate memory on host - std::vector coo_row_ind(number_of_edges); - std::vector coo_col_ind(number_of_edges); - std::vector coo_val(number_of_edges); - - // Read - EXPECT_EQ((mm_to_coo( - fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), - 0); - EXPECT_EQ(fclose(fpin), 0); - - cugraph::GraphCOOView cooview( - &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); - - return cugraph::coo_to_csr(cooview); -} - -template -edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path) -{ - edgelist_from_market_matrix_file_t ret{}; - - MM_typecode mc{}; - vertex_t m{}; - edge_t nnz{}; - - FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); - - edge_t tmp_m{}; - edge_t tmp_k{}; - auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); - m = static_cast(tmp_m); - CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), - "invalid Matrix Market file properties."); - - ret.h_rows.assign(nnz, vertex_t{0}); - ret.h_cols.assign(nnz, vertex_t{0}); - ret.h_weights.assign(nnz, weight_t{0.0}); - ret.number_of_vertices = m; - ret.is_symmetric = mm_is_symmetric(mc); - - mm_ret = cugraph::test::mm_to_coo( - file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); - - auto file_ret = fclose(file); - CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); - - return std::move(ret); -} - -template -cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted) -{ - auto mm_graph = - read_edgelist_from_matrix_market_file(graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); - - rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (test_weighted) { - raft::update_device( - d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - number_of_edges}; - - return cugraph::experimental::graph_t( - handle, - edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, - false, - true); -} - -// explicit instantiations - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, int* cooRVal, int* cooIVal); - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, double* cooRVal, double* cooIVal); - -template int mm_to_coo( - FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, float* cooRVal, float* cooIVal); - -template std::unique_ptr> -generate_graph_csr_from_mm(bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template std::unique_ptr> generate_graph_csr_from_mm( - bool& directed, std::string mm_file); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -template cugraph::experimental::graph_t -read_graph_from_matrix_market_file( - raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); - -} // namespace test -} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cu b/cpp/tests/utilities/test_utilities.cu new file mode 100644 index 00000000000..0a7b58b32cd --- /dev/null +++ b/cpp/tests/utilities/test_utilities.cu @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +extern "C" { +#include "mmio.h" +} + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +/// Read matrix properties from Matrix Market file +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param t (Output) MM_typecode with matrix properties. + * @param m (Output) Number of matrix rows. + * @param n (Output) Number of matrix columns. + * @param nnz (Output) Number of non-zero matrix entries. + * @return Zero if properties were read successfully. Otherwise + * non-zero. + */ +template +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) +{ + // Read matrix properties from file + int mint, nint, nnzint; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + *m = mint; + *n = nint; + *nnz = nnzint; + + // Find total number of non-zero entries + if (tg && !mm_is_general(*t)) { + // Non-diagonal entries should be counted twice + *nnz *= 2; + + // Diagonal entries should not be double-counted + int st; + for (int i = 0; i < nnzint; ++i) { + // Read matrix entry + // MTX only supports int for row and col idx + int row, col; + double rval, ival; + if (mm_is_pattern(*t)) + st = fscanf(f, "%d %d\n", &row, &col); + else if (mm_is_real(*t) || mm_is_integer(*t)) + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Check if entry is diagonal + if (row == col) --(*nnz); + } + } + + return 0; +} + +/// Read Matrix Market file and convert to COO format matrix +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param nnz Number of non-zero matrix entries. + * @param cooRowInd (Output) Row indices for COO matrix. Should have + * at least nnz entries. + * @param cooColInd (Output) Column indices for COO matrix. Should + * have at least nnz entries. + * @param cooRVal (Output) Real component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @param cooIVal (Output) Imaginary component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @return Zero if matrix was read successfully. Otherwise non-zero. + */ +template +int mm_to_coo(FILE* f, + int tg, + IndexType_ nnz, + IndexType_* cooRowInd, + IndexType_* cooColInd, + ValueType_* cooRVal, + ValueType_* cooIVal) +{ + // Read matrix properties from file + MM_typecode t; + int m, n, nnzOld; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, &t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + + // Add each matrix entry in file to COO format matrix + int i; // Entry index in Matrix Market file; can only be int in the MTX format + int j = 0; // Entry index in COO format matrix; can only be int in the MTX format + for (i = 0; i < nnzOld; ++i) { + // Read entry from file + int row, col; + double rval, ival; + int st; + if (mm_is_pattern(t)) { + st = fscanf(f, "%d %d\n", &row, &col); + rval = 1.0; + ival = 0.0; + } else if (mm_is_real(t) || mm_is_integer(t)) { + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + ival = 0.0; + } else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Switch to 0-based indexing + --row; + --col; + + // Record entry + cooRowInd[j] = row; + cooColInd[j] = col; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + + // Add symmetric complement of non-diagonal entries + if (tg && !mm_is_general(t) && (row != col)) { + // Modify entry value if matrix is skew symmetric or Hermitian + if (mm_is_skew(t)) { + rval = -rval; + ival = -ival; + } else if (mm_is_hermitian(t)) { + ival = -ival; + } + + // Record entry + cooRowInd[j] = col; + cooColInd[j] = row; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + } + } + return 0; +} + +// FIXME: A similar function could be useful for CSC format +// There are functions above that operate coo -> csr and coo->csc +/** + * @tparam + */ +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file) +{ + vertex_t number_of_vertices; + edge_t number_of_edges; + + FILE* fpin = fopen(mm_file.c_str(), "r"); + CUGRAPH_EXPECTS(fpin != nullptr, "fopen (%s) failure.", mm_file.c_str()); + + vertex_t number_of_columns = 0; + MM_typecode mm_typecode{0}; + CUGRAPH_EXPECTS( + mm_properties( + fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges) == 0, + "mm_properties query failure."); + CUGRAPH_EXPECTS(mm_is_matrix(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(mm_is_coordinate(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(!mm_is_complex(mm_typecode), "Invalid input file."); + CUGRAPH_EXPECTS(!mm_is_skew(mm_typecode), "Invalid input file."); + + directed = !mm_is_symmetric(mm_typecode); + + // Allocate memory on host + std::vector coo_row_ind(number_of_edges); + std::vector coo_col_ind(number_of_edges); + std::vector coo_val(number_of_edges); + + // Read + CUGRAPH_EXPECTS( + (mm_to_coo( + fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)) == 0, + "file read failure."); + CUGRAPH_EXPECTS(fclose(fpin) == 0, "fclose failure."); + + cugraph::GraphCOOView cooview( + &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); + + return cugraph::coo_to_csr(cooview); +} + +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> +read_edgelist_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted) +{ + MM_typecode mc{}; + vertex_t m{}; + size_t nnz{}; + + FILE* file = fopen(graph_file_full_path.c_str(), "r"); + CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + + size_t tmp_m{}; + size_t tmp_k{}; + auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); + m = static_cast(tmp_m); + CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), + "invalid Matrix Market file properties."); + + vertex_t number_of_vertices = m; + bool is_symmetric = mm_is_symmetric(mc); + + std::vector h_rows(nnz); + std::vector h_cols(nnz); + std::vector h_weights(nnz); + + mm_ret = cugraph::test::mm_to_coo( + file, 1, nnz, h_rows.data(), h_cols.data(), h_weights.data(), static_cast(nullptr)); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); + + auto file_ret = fclose(file); + CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); + + rmm::device_uvector d_edgelist_rows(h_rows.size(), handle.get_stream()); + rmm::device_uvector d_edgelist_cols(h_cols.size(), handle.get_stream()); + rmm::device_uvector d_edgelist_weights(test_weighted ? h_weights.size() : size_t{0}, + handle.get_stream()); + + raft::update_device(d_edgelist_rows.data(), h_rows.data(), h_rows.size(), handle.get_stream()); + raft::update_device(d_edgelist_cols.data(), h_cols.data(), h_cols.size(), handle.get_stream()); + if (test_weighted) { + raft::update_device( + d_edgelist_weights.data(), h_weights.data(), h_weights.size(), handle.get_stream()); + } + + return std::make_tuple(std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + number_of_vertices, + is_symmetric); +} + +namespace detail { + +template +std::enable_if_t< + multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); + + // 1. read from the matrix market file + + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path, test_weighted); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + // 2. filter non-local vertices & edges + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + d_vertices.resize( + thrust::distance( + d_vertices.begin(), + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + d_vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + false, comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + } + + // 3. renumber + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition{}; + vertex_t aggregate_number_of_vertices{}; + edge_t number_of_edges{}; + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + d_vertices.data(), + static_cast(d_vertices.size()), + store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), + store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), + d_edgelist_rows.size(), + false, + true); + assert(aggregate_number_of_vertices == number_of_vertices); + + // 4. create a graph + + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + std::vector>{ + cugraph::experimental::edgelist_t{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + static_cast(d_edgelist_rows.size())}}, + partition, + number_of_vertices, + number_of_edges, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + true, + true), + std::move(renumber_map_labels)); +} + +template +std::enable_if_t< + !multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path, test_weighted); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + // FIXME: set do_expensive_check to false once validated + auto renumber_map_labels = + renumber ? cugraph::experimental::renumber_edgelist( + handle, + d_vertices.data(), + static_cast(d_vertices.size()), + store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), + store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), + static_cast(d_edgelist_rows.size()), + true) + : rmm::device_uvector(0, handle.get_stream()); + + // FIXME: set do_expensive_check to false once validated + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + cugraph::experimental::edgelist_t{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + static_cast(d_edgelist_rows.size())}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + renumber ? true : false, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber) +{ + return detail:: + read_graph_from_matrix_market_file( + handle, graph_file_full_path, test_weighted, renumber); +} +// explicit instantiations + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + int32_t* cooRVal, + int32_t* cooIVal); + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + double* cooRVal, + double* cooIVal); + +template int32_t mm_to_coo(FILE* f, + int32_t tg, + int32_t nnz, + int32_t* cooRowInd, + int32_t* cooColInd, + float* cooRVal, + float* cooIVal); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +read_graph_from_matrix_market_file( + raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 406f09048e0..4b5517271f5 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -18,6 +18,8 @@ #include #include +#include + #include #include #include @@ -77,10 +79,6 @@ int mm_to_coo(FILE* f, ValueType_* cooRVal, ValueType_* cooIVal); -int read_binary_vector(FILE* fpin, int n, std::vector& val); - -int read_binary_vector(FILE* fpin, int n, std::vector& val); - // FIXME: A similar function could be useful for CSC format // There are functions above that operate coo -> csr and coo->csc /** @@ -108,24 +106,29 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } +// returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template -struct edgelist_from_market_matrix_file_t { - std::vector h_rows{}; - std::vector h_cols{}; - std::vector h_weights{}; - vertex_t number_of_vertices{}; - bool is_symmetric{}; -}; - -template -edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path); - -template -cugraph::experimental::graph_t +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> +read_edgelist_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +// renumber must be true if multi_gpu is true +template +std::tuple, + rmm::device_uvector> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, - bool test_weighted); + bool test_weighted, + bool renumber); } // namespace test } // namespace cugraph From 860bc159634df9b963dd75fed3fc9a3f86ddc6e9 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 1 Mar 2021 15:36:33 -0500 Subject: [PATCH 09/51] prepare changelog (#1433) Prepare Changelog for Automation (#1433) This PR prepares the changelog to be automatically updated during releases. The contents of the pre-release body linked in this PR will be copied into CHANGELOG.md at release time. Authors: - AJ Schmidt (@ajschmidt8) Approvers: - Dillon Cullinan (@dillon-cullinan) URL: https://github.com/rapidsai/cugraph/pull/1433 --- CHANGELOG.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e58f3b9aa07..fe8e09f1e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,6 @@ # cuGraph 0.19.0 (Date TBD) -## New Features - -## Improvements - -## Bug Fixes +Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.19-latest for the latest changes to this development branch. # cuGraph 0.18.0 (24 Feb 2021) From 07f3d71feb513298a149f282eb84ea46bc2296f9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 2 Mar 2021 17:23:03 -0500 Subject: [PATCH 10/51] Add R-mat generator (#1411) Close #1329 (with https://github.com/rapidsai/cugraph/pull/1401) Authors: - Seunghwa Kang (@seunghwak) Approvers: - Brad Rees (@BradReesWork) - Alex Fender (@afender) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1411 --- cpp/CMakeLists.txt | 1 + cpp/include/experimental/graph_generator.hpp | 84 ++++++ .../experimental/generate_rmat_edgelist.cu | 149 +++++++++ cpp/src/experimental/scramble.cuh | 82 +++++ cpp/tests/CMakeLists.txt | 12 +- cpp/tests/experimental/coarsen_graph_test.cpp | 18 +- cpp/tests/experimental/degree_test.cpp | 8 +- cpp/tests/experimental/generate_rmat_test.cpp | 285 ++++++++++++++++++ cpp/tests/experimental/weight_sum_test.cpp | 8 +- cpp/tests/utilities/test_utilities.hpp | 14 + 10 files changed, 635 insertions(+), 26 deletions(-) create mode 100644 cpp/include/experimental/graph_generator.hpp create mode 100644 cpp/src/experimental/generate_rmat_edgelist.cu create mode 100644 cpp/src/experimental/scramble.cuh create mode 100644 cpp/tests/experimental/generate_rmat_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d211fe9ed5a..108cb0748a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -420,6 +420,7 @@ add_library(cugraph SHARED src/components/connectivity.cu src/centrality/katz_centrality.cu src/centrality/betweenness_centrality.cu + src/experimental/generate_rmat_edgelist.cu src/experimental/graph.cu src/experimental/graph_view.cu src/experimental/coarsen_graph.cu diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp new file mode 100644 index 00000000000..b8495ed7581 --- /dev/null +++ b/cpp/include/experimental/graph_generator.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief generate an edge list for an R-mat graph. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 + * specification (note that scrambling does not affect cuGraph's graph construction performance, so + * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (inculding the diagonal) of the graph adjacency matrix. + * + * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different + * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as + * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * + * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in + * [0, V), where V = 1 << @p scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values + * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor = 16, + double a = 0.57, + double b = 0.19, + double c = 0.19, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu new file mode 100644 index 00000000000..0a6d666432f --- /dev/null +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + CUGRAPH_EXPECTS(size_t{1} << scale <= std::numeric_limits::max(), + "Invalid input argument: scale too large for vertex_t."); + CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0), + "Invalid input argument: a, b, c should be non-negative and a + b + c should not " + "be larger than 1.0."); + + raft::random::Rng rng(seed + 10); + // to limit memory footprint (1024 is a tuning parameter) + auto max_edges_to_generate_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * 1024; + rmm::device_uvector rands( + std::min(num_edges, max_edges_to_generate_per_iteration) * 2 * scale, handle.get_stream()); + + rmm::device_uvector srcs(num_edges, handle.get_stream()); + rmm::device_uvector dsts(num_edges, handle.get_stream()); + + size_t num_edges_generated{0}; + while (num_edges_generated < num_edges) { + auto num_edges_to_generate = + std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) + + num_edges_generated; + rng.uniform( + rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, handle.get_stream()); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_edges_to_generate), + pair_first, + // if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant + [scale, + clip_and_flip, + rands = rands.data(), + a_plus_b = a + b, + a_norm = (a + b) > 0.0 ? a / (a + b) : 0.0, + c_norm = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) { + vertex_t src{0}; + vertex_t dst{0}; + for (size_t bit = scale - 1; bit != 0; --bit) { + auto r0 = rands[i * 2 * scale + 2 * bit]; + auto r1 = rands[i * 2 * scale + 2 * bit + 1]; + auto src_bit_set = r0 > a_plus_b; + auto dst_bit_set = r1 > (src_bit_set ? c_norm : a_norm); + if (clip_and_flip) { + if (src == dst) { + if (!src_bit_set && dst_bit_set) { + src_bit_set = !src_bit_set; + dst_bit_set = !dst_bit_set; + } + } + } + src += src_bit_set ? static_cast(1 << bit) : 0; + dst += dst_bit_set ? static_cast(1 << bit) : 0; + } + return thrust::make_tuple(src, dst); + }); + num_edges_generated += num_edges_to_generate; + } + + if (scramble_vertex_ids) { + rands.resize(0, handle.get_stream()); + rands.shrink_to_fit(handle.get_stream()); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + srcs.size(), + pair_first, + [scale] __device__(auto pair) { + return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale), + detail::scramble(thrust::get<1>(pair), scale)); + }); + } + + return std::make_tuple(std::move(srcs), std::move(dsts)); +} + +// explicit instantiation + +template std::tuple, rmm::device_uvector> +generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::tuple, rmm::device_uvector> +generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/scramble.cuh b/cpp/src/experimental/scramble.cuh new file mode 100644 index 00000000000..875bb5feff0 --- /dev/null +++ b/cpp/src/experimental/scramble.cuh @@ -0,0 +1,82 @@ +/* Copyright (C) 2009-2010 The Trustees of Indiana University. */ +/* */ +/* Use, modification and distribution is subject to the Boost Software */ +/* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */ +/* http://www.boost.org/LICENSE_1_0.txt) */ +/* */ +/* Authors: Jeremiah Willcock */ +/* Andrew Lumsdaine */ + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return __brevll(value); +} + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return __brev(value); +} + +template +__device__ std::enable_if_t bitreversal(uvertex_t value) +{ + return static_cast(__brev(value) >> 16); +} + +/* Apply a permutation to scramble vertex numbers; a randomly generated + * permutation is not used because applying it at scale is too expensive. */ +template +__device__ vertex_t scramble(vertex_t value, size_t lgN) +{ + constexpr size_t number_of_bits = sizeof(vertex_t) * 8; + + static_assert((number_of_bits == 64) || (number_of_bits == 32) || (number_of_bits == 16)); + assert((std::is_unsigned::value && lgN <= number_of_bits) || + (!std::is_unsigned::value && lgN < number_of_bits)); + assert(value >= 0); + + using uvertex_t = typename std::make_unsigned::type; + + constexpr auto scramble_value0 = static_cast( + sizeof(vertex_t) == 8 ? 606610977102444280 : (sizeof(vertex_t) == 4 ? 282475248 : 0)); + constexpr auto scramble_value1 = static_cast( + sizeof(vertex_t) == 8 ? 11680327234415193037 : (sizeof(vertex_t) == 4 ? 2617694917 : 8620)); + + auto v = static_cast(value); + v += scramble_value0 + scramble_value1; + v *= (scramble_value0 | static_cast(0x4519840211493211)); + v = bitreversal(v) >> (number_of_bits - lgN); + v *= (scramble_value1 | static_cast(0x3050852102C843A5)); + v = bitreversal(v) >> (number_of_bits - lgN); + return static_cast(v); +} + +} // namespace detail +} // namespace experimental +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a93aa0cfabb..18dfdbc8f63 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -317,6 +317,14 @@ set(MST_TEST_SRC ConfigureTest(MST_TEST "${MST_TEST_SRC}") +################################################################################################### +# - Experimental R-mat graph generation tests ----------------------------------------------------- + +set(EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/generate_rmat_test.cpp") + +ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST "${EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS}" "") + ################################################################################################### # - Experimental Graph tests ---------------------------------------------------------------------- @@ -329,8 +337,6 @@ ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") # - Experimental weight-sum tests ----------------------------------------------------------------- set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") @@ -339,8 +345,6 @@ ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS} # - Experimental degree tests --------------------------------------------------------------------- set(EXPERIMENTAL_DEGREE_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 941b33e5661..789619f2cd9 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -36,20 +36,6 @@ #include #include -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return (v >= 0) && (v < num_vertices); -} - -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return v < num_vertices; -} - template void check_coarsened_graph_results(edge_t* org_offsets, vertex_t* org_indices, @@ -68,13 +54,13 @@ void check_coarsened_graph_results(edge_t* org_offsets, ASSERT_TRUE(std::count_if(org_indices, org_indices + org_offsets[num_org_vertices], [num_org_vertices](auto nbr) { - return !is_valid_vertex(num_org_vertices, nbr); + return !cugraph::test::is_valid_vertex(num_org_vertices, nbr); }) == 0); ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); ASSERT_TRUE(std::count_if(coarse_indices, coarse_indices + coarse_offsets[num_coarse_vertices], [num_coarse_vertices](auto nbr) { - return !is_valid_vertex(num_coarse_vertices, nbr); + return !cugraph::test::is_valid_vertex(num_coarse_vertices, nbr); }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index 7c7b41cdacc..581b6b29f64 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -83,9 +83,11 @@ class Tests_Degree : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp new file mode 100644 index 00000000000..249a1a3c6c8 --- /dev/null +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +// this function assumes that vertex IDs are not scrambled +template +void validate_rmat_distribution( + std::tuple* edges, + size_t num_edges, + vertex_t src_first, + vertex_t src_last, + vertex_t dst_first, + vertex_t dst_last, + double a, + double b, + double c, + bool clip_and_flip, + size_t min_edges /* stop recursion if # edges < min_edges */, + double error_tolerance /* (computed a|b|c - input a|b|c) shoud be smaller than error_tolerance*/) +{ + // we cannot expect the ratios of the edges in the four quadrants of the graph adjacency matrix to + // converge close to a, b, c, d if num_edges is not large enough. + if (num_edges < min_edges) { return; } + + auto src_threshold = (src_first + src_last) / 2; + auto dst_threshold = (dst_first + dst_last) / 2; + + auto a_plus_b_last = std::partition(edges, edges + num_edges, [src_threshold](auto edge) { + return std::get<0>(edge) < src_threshold; + }); + auto a_last = std::partition( + edges, a_plus_b_last, [dst_threshold](auto edge) { return std::get<1>(edge) < dst_threshold; }); + auto c_last = std::partition(a_plus_b_last, edges + num_edges, [dst_threshold](auto edge) { + return std::get<1>(edge) < dst_threshold; + }); + + ASSERT_TRUE(std::abs((double)std::distance(edges, a_last) / num_edges - a) < error_tolerance) + << "# edges=" << num_edges << " computed a=" << (double)std::distance(edges, a_last) / num_edges + << " iput a=" << a << " error tolerance=" << error_tolerance << "."; + if (clip_and_flip && (src_first == dst_first) && + (src_last == dst_last)) { // if clip_and_flip and in the diagonal + ASSERT_TRUE(std::distance(a_last, a_plus_b_last) == 0); + ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - (b + c)) < + error_tolerance) + << "# edges=" << num_edges + << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges + << " iput (b + c)=" << (b + c) << " error tolerance=" << error_tolerance << "."; + } else { + ASSERT_TRUE(std::abs((double)std::distance(a_last, a_plus_b_last) / num_edges - b) < + error_tolerance) + << "# edges=" << num_edges + << " computed b=" << (double)std::distance(a_last, a_plus_b_last) / num_edges + << " iput b=" << b << " error tolerance=" << error_tolerance << "."; + ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - c) < + error_tolerance) + << "# edges=" << num_edges + << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges + << " iput c=" << c << " error tolerance=" << error_tolerance << "."; + } + + validate_rmat_distribution(edges, + std::distance(edges, a_last), + src_first, + src_threshold, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(a_last, + std::distance(a_last, a_plus_b_last), + src_first, + (src_first + src_last) / 2, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(a_plus_b_last, + std::distance(a_plus_b_last, c_last), + src_threshold, + src_last, + dst_first, + dst_threshold, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + validate_rmat_distribution(c_last, + std::distance(c_last, edges + num_edges), + src_threshold, + src_last, + dst_threshold, + dst_last, + a, + b, + c, + clip_and_flip, + min_edges, + error_tolerance); + + return; +} + +typedef struct GenerateRmat_Usecase_t { + size_t scale{0}; + size_t edge_factor{0}; + double a{0.0}; + double b{0.0}; + double c{0.0}; + bool clip_and_flip{false}; + + GenerateRmat_Usecase_t( + size_t scale, size_t edge_factor, double a, double b, double c, bool clip_and_flip) + : scale(scale), edge_factor(edge_factor), a(a), b(b), c(c), clip_and_flip(clip_and_flip){}; +} GenerateRmat_Usecase; + +class Tests_GenerateRmat : public ::testing::TestWithParam { + public: + Tests_GenerateRmat() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(GenerateRmat_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto num_vertices = static_cast(size_t{1} << configuration.scale); + std::vector no_scramble_out_degrees(num_vertices, 0); + std::vector no_scramble_in_degrees(num_vertices, 0); + std::vector scramble_out_degrees(num_vertices, 0); + std::vector scramble_in_degrees(num_vertices, 0); + for (size_t scramble = 0; scramble < 2; ++scramble) { + rmm::device_uvector d_srcs(0, handle.get_stream()); + rmm::device_uvector d_dsts(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::tie(d_srcs, d_dsts) = cugraph::experimental::generate_rmat_edgelist( + handle, + configuration.scale, + (size_t{1} << configuration.scale) * configuration.edge_factor, + configuration.a, + configuration.b, + configuration.c, + uint64_t{0}, + configuration.clip_and_flip, + static_cast(scramble)); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_srcs(d_srcs.size()); + std::vector h_cugraph_dsts(d_dsts.size()); + + raft::update_host(h_cugraph_srcs.data(), d_srcs.data(), d_srcs.size(), handle.get_stream()); + raft::update_host(h_cugraph_dsts.data(), d_dsts.data(), d_dsts.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE( + (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) && + (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor)) + << "Returned an invalid number of R-mat graph edges."; + ASSERT_TRUE( + std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_vertices = static_cast(size_t{1} << configuration.scale)]( + auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + << "Returned R-mat graph edges have invalid source vertex IDs."; + ASSERT_TRUE( + std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_vertices = static_cast(size_t{1} << configuration.scale)]( + auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + << "Returned R-mat graph edges have invalid destination vertex IDs."; + + if (!scramble) { + if (configuration.clip_and_flip) { + for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) { + ASSERT_TRUE(h_cugraph_srcs[i] >= h_cugraph_dsts[i]); + } + } + + std::vector> h_cugraph_edges(h_cugraph_srcs.size()); + for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) { + h_cugraph_edges[i] = std::make_tuple(h_cugraph_srcs[i], h_cugraph_dsts[i]); + } + + validate_rmat_distribution(h_cugraph_edges.data(), + h_cugraph_edges.size(), + vertex_t{0}, + num_vertices, + vertex_t{0}, + num_vertices, + configuration.a, + configuration.b, + configuration.c, + configuration.clip_and_flip, + size_t{100000}, + 0.01); + } + + if (scramble) { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&scramble_out_degrees](auto src) { scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&scramble_in_degrees](auto dst) { scramble_in_degrees[dst]++; }); + std::sort(scramble_out_degrees.begin(), scramble_out_degrees.end()); + std::sort(scramble_in_degrees.begin(), scramble_in_degrees.end()); + } else { + std::for_each(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [&no_scramble_out_degrees](auto src) { no_scramble_out_degrees[src]++; }); + std::for_each(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [&no_scramble_in_degrees](auto dst) { no_scramble_in_degrees[dst]++; }); + std::sort(no_scramble_out_degrees.begin(), no_scramble_out_degrees.end()); + std::sort(no_scramble_in_degrees.begin(), no_scramble_in_degrees.end()); + } + } + + // this relies on the fact that the edge generator is deterministic. + // ideally, we should test that the two graphs are isomorphic, but this is NP hard; insted, we + // just check out-degree & in-degree distributions + ASSERT_TRUE(std::equal(no_scramble_out_degrees.begin(), + no_scramble_out_degrees.end(), + scramble_out_degrees.begin())); + ASSERT_TRUE(std::equal( + no_scramble_in_degrees.begin(), no_scramble_in_degrees.end(), scramble_in_degrees.begin())); + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_GenerateRmat, CheckInt32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_GenerateRmat, + ::testing::Values(GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, true), + GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), + GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index aeda7386314..9ab47b69baa 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -85,9 +85,11 @@ class Tests_WeightSum : public ::testing::TestWithParam { { raft::handle_t handle{}; - auto graph = cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true); + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true, false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 4b5517271f5..4682699df2d 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -130,5 +130,19 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return v < num_vertices; +} + } // namespace test } // namespace cugraph From 79745021553f227b2ed8a5a3508ddb7bda939c54 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 3 Mar 2021 12:28:11 -0500 Subject: [PATCH 11/51] =?UTF-8?q?update=20default=20path=20of=20setup=20to?= =?UTF-8?q?=20use=20the=20new=20directory=20paths=20in=20build=20=E2=80=A6?= =?UTF-8?q?=20(#1425)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent change caused the `setup.py` running in a clean checkout to fail unless either `RAFT_PATH` or `CUGRAPH_BUILD_PATH` were set. These are typically set if you run the build scripts, but if you try running the build steps individually by hand the script would fail unless you set one of these environment variables correctly. This PR fixes the default path (if neither environment variable is specified) to construct the proper location for looking up raft. Closes #1428 Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Rick Ratzel (@rlratzel) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1425 --- python/setuputils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setuputils.py b/python/setuputils.py index 47eaf74d4b6..d93051d05ef 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -152,7 +152,7 @@ def clone_repo_if_needed(name, cpp_build_path=None, repo_path = ( _get_repo_path() + '/python/_external_repositories/' + name + '/') else: - repo_path = os.path.join(cpp_build_path, name + '/src/' + name + '/') + repo_path = os.path.join(cpp_build_path, '_deps', name + '-src') return repo_path, repo_cloned From c1047ed79525d7ff7a5d484e19aaa048271c45da Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 4 Mar 2021 09:05:32 -0500 Subject: [PATCH 12/51] Replace rmm::device_vector & thrust::host_vector with rmm::device_uvector & std::vector, respectively. (#1421) - [x] Replace rmm::device_vector with rmm::device_uvector for better concurrency in multi-stream executions - [x] Replace thrust::host_vector with std::vector This PR partially addresses https://github.com/rapidsai/cugraph/issues/1390 Authors: - Seunghwa Kang (@seunghwak) Approvers: - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1421 --- cpp/include/compute_partition.cuh | 26 ++++++---- cpp/include/patterns/count_if_e.cuh | 4 +- cpp/include/patterns/transform_reduce_e.cuh | 11 ++-- .../update_frontier_v_push_if_out_nbr.cuh | 26 +++++----- cpp/include/patterns/vertex_frontier.cuh | 51 ++++++++++++------- cpp/src/experimental/louvain.cuh | 2 +- 6 files changed, 72 insertions(+), 48 deletions(-) diff --git a/cpp/include/compute_partition.cuh b/cpp/include/compute_partition.cuh index c81a6237b31..5c03b0971f2 100644 --- a/cpp/include/compute_partition.cuh +++ b/cpp/include/compute_partition.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,27 +39,32 @@ class compute_partition_t { using graph_view_t = graph_view_type; using vertex_t = typename graph_view_type::vertex_type; - compute_partition_t(graph_view_t const &graph_view) + compute_partition_t(raft::handle_t const &handle, graph_view_t const &graph_view) + : vertex_partition_offsets_v_(0, handle.get_stream()) { - init(graph_view); + init(handle, graph_view); } private: template * = nullptr> - void init(graph_view_t const &graph_view) + void init(raft::handle_t const &handle, graph_view_t const &graph_view) { } template * = nullptr> - void init(graph_view_t const &graph_view) + void init(raft::handle_t const &handle, graph_view_t const &graph_view) { auto partition = graph_view.get_partition(); row_size_ = partition.get_row_size(); col_size_ = partition.get_col_size(); size_ = row_size_ * col_size_; - vertex_partition_offsets_v_.resize(size_ + 1); - vertex_partition_offsets_v_ = partition.get_vertex_partition_offsets(); + vertex_partition_offsets_v_.resize(size_ + 1, handle.get_stream()); + auto vertex_partition_offsets = partition.get_vertex_partition_offsets(); + raft::update_device(vertex_partition_offsets_v_.data(), + vertex_partition_offsets.data(), + vertex_partition_offsets.size(), + handle.get_stream()); } public: @@ -166,7 +171,7 @@ class compute_partition_t { */ vertex_device_view_t vertex_device_view() const { - return vertex_device_view_t(vertex_partition_offsets_v_.data().get(), size_); + return vertex_device_view_t(vertex_partition_offsets_v_.data(), size_); } /** @@ -176,12 +181,11 @@ class compute_partition_t { */ edge_device_view_t edge_device_view() const { - return edge_device_view_t( - vertex_partition_offsets_v_.data().get(), row_size_, col_size_, size_); + return edge_device_view_t(vertex_partition_offsets_v_.data(), row_size_, col_size_, size_); } private: - rmm::device_vector vertex_partition_offsets_v_{}; + rmm::device_uvector vertex_partition_offsets_v_; int row_size_{1}; int col_size_{1}; int size_{1}; diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 63b31f9c44e..99bfc80f643 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -201,7 +201,7 @@ typename GraphViewType::edge_type count_if_e( detail::count_if_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - rmm::device_vector block_counts(update_grid.num_blocks); + rmm::device_uvector block_counts(update_grid.num_blocks, handle.get_stream()); detail::for_all_major_for_all_nbr_low_degree<< block_results(update_grid.num_blocks); + auto block_result_buffer = + allocate_dataframe_buffer(update_grid.num_blocks, handle.get_stream()); detail::for_all_major_for_all_nbr_low_degree<<(block_result_buffer), e_op); // FIXME: we have several options to implement this. With cooperative group support @@ -225,10 +226,10 @@ T transform_reduce_e(raft::handle_t const& handle, // synchronization point in varying timings and the number of SMs is not very big) auto partial_result = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_results.begin(), - block_results.end(), + get_dataframe_buffer_begin(block_result_buffer), + get_dataframe_buffer_begin(block_result_buffer) + update_grid.num_blocks, T(), - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); result = plus_edge_op_result(result, partial_result); } diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4c76322fa79..4efd32bcac7 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -157,13 +158,14 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the // vertex unless reduce_op is a pure function. rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); - rmm::device_vector values(num_buffer_elements); + auto value_buffer = + allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, buffer_key_output_first + num_buffer_elements, buffer_payload_output_first, keys.begin(), - values.begin(), + get_dataframe_buffer_begin(value_buffer), thrust::equal_to(), reduce_op); auto num_reduced_buffer_elements = @@ -173,13 +175,9 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, keys.begin() + num_reduced_buffer_elements, buffer_key_output_first); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - values.begin(), - values.begin() + num_reduced_buffer_elements, + get_dataframe_buffer_begin(value_buffer), + get_dataframe_buffer_begin(value_buffer) + num_reduced_buffer_elements, buffer_payload_output_first); - // FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values - CUDA_TRY( - cudaStreamSynchronize(handle.get_stream())); // this is necessary as values will become - // out-of-scope once this function returns return num_reduced_buffer_elements; } } @@ -673,15 +671,19 @@ void update_frontier_v_push_if_out_nbr( num_buffer_elements, vertex_value_input_first, vertex_value_output_first, - std::get<0>(bucket_and_bucket_size_device_ptrs).get(), - std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + std::get<0>(bucket_and_bucket_size_device_ptrs), + std::get<1>(bucket_and_bucket_size_device_ptrs), VertexFrontierType::kInvalidBucketIdx, invalid_vertex, v_op); auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - thrust::host_vector bucket_sizes( - bucket_sizes_device_ptr, bucket_sizes_device_ptr + VertexFrontierType::kNumBuckets); + std::vector bucket_sizes(VertexFrontierType::kNumBuckets); + raft::update_host(bucket_sizes.data(), + bucket_sizes_device_ptr, + VertexFrontierType::kNumBuckets, + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) { vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]); } diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index 2126a27ee5a..c11142d3cf7 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -147,13 +147,17 @@ template class Bucket { public: Bucket(raft::handle_t const& handle, size_t capacity) - : handle_ptr_(&handle), elements_(capacity, invalid_vertex_id::value) + : handle_ptr_(&handle), elements_(capacity, handle.get_stream()) { + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + elements_.begin(), + elements_.end(), + invalid_vertex_id::value); } void insert(vertex_t v) { - elements_[size_] = v; + raft::update_device(elements_.data() + size_, &v, 1, handle_ptr_->get_stream()); ++size_; } @@ -177,9 +181,9 @@ class Bucket { size_t capacity() const { return elements_.size(); } - auto const data() const { return elements_.data().get(); } + auto const data() const { return elements_.data(); } - auto data() { return elements_.data().get(); } + auto data() { return elements_.data(); } auto const begin() const { return elements_.begin(); } @@ -191,7 +195,7 @@ class Bucket { private: raft::handle_t const* handle_ptr_{nullptr}; - rmm::device_vector elements_{}; + rmm::device_uvector elements_; size_t size_{0}; }; @@ -206,13 +210,21 @@ class VertexFrontier { VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) : handle_ptr_(&handle), - tmp_bucket_ptrs_(num_buckets, nullptr), - tmp_bucket_sizes_(num_buckets, 0), + tmp_bucket_ptrs_(num_buckets, handle.get_stream()), + tmp_bucket_sizes_(num_buckets, handle.get_stream()), buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr), buffer_idx_(0, handle_ptr_->get_stream()) { CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, "invalid input argument bucket_capacities (size mismatch)"); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + tmp_bucket_ptrs_.begin(), + tmp_bucket_ptrs_.end(), + static_cast(nullptr)); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + tmp_bucket_sizes_.begin(), + tmp_bucket_sizes_.end(), + size_t{0}); for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle, bucket_capacities[i]); } @@ -251,8 +263,8 @@ class VertexFrontier { 0, handle_ptr_->get_stream()>>>(this_bucket.begin(), this_bucket.end(), - std::get<0>(bucket_and_bucket_size_device_ptrs).get(), - std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + std::get<0>(bucket_and_bucket_size_device_ptrs), + std::get<1>(bucket_and_bucket_size_device_ptrs), bucket_idx, kInvalidBucketIdx, invalid_vertex, @@ -269,8 +281,10 @@ class VertexFrontier { [] __device__(auto value) { return value == invalid_vertex; }); auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - thrust::host_vector bucket_sizes(bucket_sizes_device_ptr, - bucket_sizes_device_ptr + kNumBuckets); + std::vector bucket_sizes(kNumBuckets); + raft::update_host( + bucket_sizes.data(), bucket_sizes_device_ptr, kNumBuckets, handle_ptr_->get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); for (size_t i = 0; i < kNumBuckets; ++i) { if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); } } @@ -283,14 +297,17 @@ class VertexFrontier { auto get_bucket_and_bucket_size_device_pointers() { - thrust::host_vector tmp_ptrs(buckets_.size(), nullptr); - thrust::host_vector tmp_sizes(buckets_.size(), 0); + std::vector tmp_ptrs(buckets_.size(), nullptr); + std::vector tmp_sizes(buckets_.size(), 0); for (size_t i = 0; i < buckets_.size(); ++i) { tmp_ptrs[i] = get_bucket(i).data(); tmp_sizes[i] = get_bucket(i).size(); } - tmp_bucket_ptrs_ = tmp_ptrs; - tmp_bucket_sizes_ = tmp_sizes; + raft::update_device( + tmp_bucket_ptrs_.data(), tmp_ptrs.data(), tmp_ptrs.size(), handle_ptr_->get_stream()); + raft::update_device( + tmp_bucket_sizes_.data(), tmp_sizes.data(), tmp_sizes.size(), handle_ptr_->get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); } @@ -345,8 +362,8 @@ class VertexFrontier { raft::handle_t const* handle_ptr_{nullptr}; std::vector> buckets_{}; - rmm::device_vector tmp_bucket_ptrs_{}; - rmm::device_vector tmp_bucket_sizes_{}; + rmm::device_uvector tmp_bucket_ptrs_; + rmm::device_uvector tmp_bucket_sizes_; std::array tuple_element_sizes_ = compute_thrust_tuple_element_sizes()(); diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index f162cd17a61..fe8310a62ca 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -405,7 +405,7 @@ class Louvain { handle_(handle), dendrogram_(std::make_unique>()), current_graph_view_(graph_view), - compute_partition_(graph_view), + compute_partition_(handle, graph_view), local_num_vertices_(graph_view.get_number_of_local_vertices()), local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()), local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()), From 65f2f5b6d3f6c9c0ed88369175f9f14001724b85 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 4 Mar 2021 11:20:25 -0600 Subject: [PATCH 13/51] Remove raft handle duplication (#1436) Closes #1407 Authors: - @Iroy30 Approvers: - Alex Fender (@afender) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1436 --- .../betweenness_centrality_wrapper.pyx | 8 +- .../edge_betweenness_centrality_wrapper.pyx | 4 +- python/cugraph/centrality/katz_centrality.pxd | 4 +- .../centrality/katz_centrality_wrapper.pyx | 7 +- python/cugraph/comms/comms.pxd | 4 +- python/cugraph/comms/comms_wrapper.pyx | 20 +- python/cugraph/community/egonet.pxd | 2 +- python/cugraph/community/egonet_wrapper.pyx | 4 +- .../community/ktruss_subgraph_wrapper.pyx | 8 +- python/cugraph/community/leiden_wrapper.pyx | 3 +- python/cugraph/community/louvain.pxd | 4 +- python/cugraph/community/louvain_wrapper.pyx | 5 +- .../community/spectral_clustering_wrapper.pyx | 5 +- .../community/subgraph_extraction_wrapper.pyx | 2 - .../community/triangle_count_wrapper.pyx | 4 +- .../components/connectivity_wrapper.pyx | 3 +- python/cugraph/cores/core_number_wrapper.pyx | 4 +- python/cugraph/cores/k_core_wrapper.pyx | 7 +- .../dask/centrality/mg_katz_centrality.pxd | 4 +- .../centrality/mg_katz_centrality_wrapper.pyx | 4 +- python/cugraph/dask/community/louvain.pxd | 4 +- .../dask/community/louvain_wrapper.pyx | 5 +- .../dask/link_analysis/mg_pagerank.pxd | 6 +- .../link_analysis/mg_pagerank_wrapper.pyx | 4 +- .../dask/structure/renumber_wrapper.pyx | 2 +- python/cugraph/dask/traversal/mg_bfs.pxd | 4 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 4 +- python/cugraph/dask/traversal/mg_sssp.pxd | 4 +- .../dask/traversal/mg_sssp_wrapper.pyx | 4 +- .../cugraph/layout/force_atlas2_wrapper.pyx | 6 +- .../cugraph/linear_assignment/lap_wrapper.pyx | 3 +- python/cugraph/link_analysis/hits_wrapper.pyx | 7 +- python/cugraph/link_analysis/pagerank.pxd | 4 +- .../link_analysis/pagerank_wrapper.pyx | 5 +- .../link_prediction/jaccard_wrapper.pyx | 4 +- .../link_prediction/overlap_wrapper.pyx | 4 +- python/cugraph/structure/graph_primtypes.pxd | 153 +--------------- python/cugraph/structure/graph_utilities.pxd | 173 ++++++++++++++++++ python/cugraph/structure/utils.pxd | 5 +- python/cugraph/structure/utils_wrapper.pyx | 3 +- python/cugraph/traversal/bfs.pxd | 4 +- python/cugraph/traversal/bfs_wrapper.pyx | 7 +- python/cugraph/traversal/sssp.pxd | 4 +- python/cugraph/traversal/sssp_wrapper.pyx | 9 +- .../traveling_salesperson_wrapper.pyx | 1 - 45 files changed, 255 insertions(+), 280 deletions(-) create mode 100644 python/cugraph/structure/graph_utilities.pxd diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx index e3d6e04006f..855de3327ba 100644 --- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,18 +17,12 @@ # cython: language_level = 3 from cugraph.centrality.betweenness_centrality cimport betweenness_centrality as c_betweenness_centrality -from cugraph.centrality.betweenness_centrality cimport handle_t from cugraph.structure.graph import DiGraph from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf import numpy as np -import numpy.ctypeslib as ctypeslib - -import dask_cudf -import dask_cuda - import cugraph.comms.comms as Comms from cugraph.dask.common.mg_utils import get_client import dask.distributed diff --git a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx index 3c14d590750..136bde1b0e3 100644 --- a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,8 +24,6 @@ from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf import numpy as np -import numpy.ctypeslib as ctypeslib - from cugraph.dask.common.mg_utils import get_client import cugraph.comms.comms as Comms import dask.distributed diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index ebf94c78263..ce9ab5291f6 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index 088042395fd..d38a0b82824 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,13 +17,10 @@ # cython: language_level = 3 from cugraph.centrality.katz_centrality cimport call_katz_centrality -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd index 44f7ee77562..3984ade9a9c 100644 --- a/python/cugraph/comms/comms.pxd +++ b/python/cugraph/comms/comms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport handle_t +from cugraph.raft.common.handle cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/comms/comms_wrapper.pyx b/python/cugraph/comms/comms_wrapper.pyx index c1148b4c887..09fa3b1c5c7 100644 --- a/python/cugraph/comms/comms_wrapper.pyx +++ b/python/cugraph/comms/comms_wrapper.pyx @@ -1,5 +1,23 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -from cugraph.structure.graph_primtypes cimport handle_t +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from cugraph.raft.common.handle cimport * from cugraph.comms.comms cimport init_subcomms as c_init_subcomms diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd index 3ddf929674f..cf1c84fb5f7 100644 --- a/python/cugraph/community/egonet.pxd +++ b/python/cugraph/community/egonet.pxd @@ -12,7 +12,7 @@ # limitations under the License. -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 122dedbfabd..7d2a1169e25 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -12,14 +12,12 @@ # limitations under the License. from cugraph.community.egonet cimport call_egonet -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx index 9f8138f4d57..9f38b33d774 100644 --- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,12 +19,6 @@ from cugraph.community.ktruss_subgraph cimport * from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool -from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - -import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx index 70fcfcf701b..1b41134c625 100644 --- a/python/cugraph/community/leiden_wrapper.pyx +++ b/python/cugraph/community/leiden_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,7 +22,6 @@ from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index eca15ba3d20..1f75c13dbaf 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from libcpp.utility cimport pair -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 6b218a0b962..c7ce4e8db66 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,12 +17,11 @@ # cython: language_level = 3 from cugraph.community cimport louvain as c_louvain -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/spectral_clustering_wrapper.pyx b/python/cugraph/community/spectral_clustering_wrapper.pyx index 0593d987c0d..7934a386bb7 100644 --- a/python/cugraph/community/spectral_clustering_wrapper.pyx +++ b/python/cugraph/community/spectral_clustering_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,12 +23,9 @@ from cugraph.community.spectral_clustering cimport analyzeClustering_edge_cut as from cugraph.community.spectral_clustering cimport analyzeClustering_ratio_cut as c_analyze_clustering_ratio_cut from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t - import cugraph import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 35b3c743987..31c5d2372f0 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -20,9 +20,7 @@ from cugraph.community.subgraph_extraction cimport extract_subgraph_vertex as c_ from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/community/triangle_count_wrapper.pyx b/python/cugraph/community/triangle_count_wrapper.pyx index d7cabd4676f..f1e842f9de4 100644 --- a/python/cugraph/community/triangle_count_wrapper.pyx +++ b/python/cugraph/community/triangle_count_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,9 +21,7 @@ from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import numpy as np - import cudf -import rmm def triangles(input_graph): diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index 8b678d16ff8..76d279a8116 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,7 +23,6 @@ from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cugraph.structure.symmetrize import symmetrize from cugraph.structure.graph import Graph as type_Graph - import cudf import numpy as np diff --git a/python/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cores/core_number_wrapper.pyx index 3df1df5f8e9..9fcc3b4746c 100644 --- a/python/cugraph/cores/core_number_wrapper.pyx +++ b/python/cugraph/cores/core_number_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,9 +20,7 @@ cimport cugraph.cores.core_number as c_core from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t - import cudf -import rmm import numpy as np diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index 51ecec09dc5..a0ef99a8e8b 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,12 +19,7 @@ from cugraph.cores.k_core cimport k_core as c_k_core from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper -from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - -import cudf -import rmm import numpy as np diff --git a/python/cugraph/dask/centrality/mg_katz_centrality.pxd b/python/cugraph/dask/centrality/mg_katz_centrality.pxd index 345457b1963..fb1730da13b 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality.pxd +++ b/python/cugraph/dask/centrality/mg_katz_centrality.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index b8cab4e4286..ccae26fe7e6 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.centrality cimport mg_katz_centrality as c_katz_centrality import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index b6b4cd23143..738309dac8a 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ # cython: language_level = 3 from libcpp.utility cimport pair -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index c2a12cf81f3..f58630d07aa 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,8 +19,7 @@ from libc.stdint cimport uintptr_t from cugraph.dask.community cimport louvain as c_louvain -from cugraph.structure.graph_primtypes cimport * - +from cugraph.structure.graph_utilities cimport * import cudf import numpy as np diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 91104d9127c..55bbc0dba7e 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool @@ -31,4 +31,4 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": double alpha, double tolerance, long long max_iter, - bool has_guess) except + \ No newline at end of file + bool has_guess) except + diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 1cd80397b17..12f2342559b 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.link_analysis cimport mg_pagerank as c_pagerank import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/dask/structure/renumber_wrapper.pyx index 40dd80aeb67..1ab290cfb10 100644 --- a/python/cugraph/dask/structure/renumber_wrapper.pyx +++ b/python/cugraph/dask/structure/renumber_wrapper.pyx @@ -16,7 +16,7 @@ from cugraph.structure.utils_wrapper import * import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 82c6e97d668..afd209158c4 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index c92f28eb407..527cb2bcf0a 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.traversal cimport mg_bfs as c_bfs import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd index f846facd269..d56575da567 100644 --- a/python/cugraph/dask/traversal/mg_sssp.pxd +++ b/python/cugraph/dask/traversal/mg_sssp.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index b7aec103098..15d956836b4 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.traversal cimport mg_sssp as c_sssp import cudf -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 785ddda47bd..4515c577f78 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,15 +19,11 @@ from cugraph.layout.force_atlas2 cimport force_atlas2 as c_force_atlas2 from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.graph_primtypes cimport * -from cugraph.structure import utils_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t - import cudf -import cudf._lib as libcudf from numba import cuda import numpy as np -import numpy.ctypeslib as ctypeslib cdef extern from "internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index 0769ef42f0f..7cd2124b8d9 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -21,11 +21,10 @@ from cugraph.linear_assignment.lap cimport dense_hungarian as c_dense_hungarian from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cugraph.structure.graph import Graph as type_Graph - import cudf import numpy as np + def sparse_hungarian(input_graph, workers): """ Call the hungarian algorithm diff --git a/python/cugraph/link_analysis/hits_wrapper.pyx b/python/cugraph/link_analysis/hits_wrapper.pyx index 3e19e38a023..2a2d33dea0b 100644 --- a/python/cugraph/link_analysis/hits_wrapper.pyx +++ b/python/cugraph/link_analysis/hits_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,13 +18,10 @@ from cugraph.link_analysis.hits cimport hits as c_hits from cugraph.structure.graph_primtypes cimport * -from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): @@ -48,8 +45,6 @@ def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): df['hubs'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) df['authorities'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) - #cdef bool normalized = 1 - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_hubs = df['hubs'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_authorities = df['authorities'].__cuda_array_interface__['data'][0]; diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index 79cb033f74b..2c8bea12016 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index fea1939db6a..81a68d42360 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -16,16 +16,13 @@ # cython: embedsignature = True # cython: language_level = 3 -#cimport cugraph.link_analysis.pagerank as c_pagerank from cugraph.link_analysis.pagerank cimport call_pagerank -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from cugraph.structure import graph_primtypes_wrapper import cudf -import rmm import numpy as np -import numpy.ctypeslib as ctypeslib def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None): diff --git a/python/cugraph/link_prediction/jaccard_wrapper.pyx b/python/cugraph/link_prediction/jaccard_wrapper.pyx index cacd13dec65..8d236c60ee2 100644 --- a/python/cugraph/link_prediction/jaccard_wrapper.pyx +++ b/python/cugraph/link_prediction/jaccard_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,8 +21,6 @@ from cugraph.link_prediction.jaccard cimport jaccard_list as c_jaccard_list from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cython cimport floating - import cudf import numpy as np diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx index 9e2f3ba49d7..4cb17aa21a6 100644 --- a/python/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/link_prediction/overlap_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,8 +21,6 @@ from cugraph.link_prediction.overlap cimport overlap_list as c_overlap_list from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t -from cython cimport floating - import cudf import numpy as np diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 07132df2598..3bb42633ac1 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -20,12 +20,9 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.utility cimport pair from libcpp.vector cimport vector - +from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer -cdef extern from "raft/handle.hpp" namespace "raft": - cdef cppclass handle_t: - handle_t() except + cdef extern from "graph.hpp" namespace "cugraph": @@ -127,7 +124,6 @@ cdef extern from "graph.hpp" namespace "cugraph": GraphCSRView[VT,ET,WT] view() - cdef extern from "algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( @@ -144,89 +140,6 @@ cdef extern from "functions.hpp" namespace "cugraph": ET *map_size) except + -# renumber_edgelist() interface: -# -# -# 1. `cdef extern partition_t`: -# -cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": - - cdef cppclass partition_t[vertex_t]: - pass - - -# 2. return type for shuffle: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef cppclass major_minor_weights_t[vertex_t, weight_t]: - major_minor_weights_t(const handle_t &handle) - pair[unique_ptr[device_buffer], size_t] get_major_wrap() - pair[unique_ptr[device_buffer], size_t] get_minor_wrap() - pair[unique_ptr[device_buffer], size_t] get_weights_wrap() - - -ctypedef fused shuffled_vertices_t: - major_minor_weights_t[int, float] - major_minor_weights_t[int, double] - major_minor_weights_t[long, float] - major_minor_weights_t[long, double] - -# 3. return type for renumber: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef cppclass renum_quad_t[vertex_t, edge_t]: - renum_quad_t(const handle_t &handle) - pair[unique_ptr[device_buffer], size_t] get_dv_wrap() - vertex_t& get_num_vertices() - edge_t& get_num_edges() - int get_part_row_size() - int get_part_col_size() - int get_part_comm_rank() - unique_ptr[vector[vertex_t]] get_partition_offsets() - pair[vertex_t, vertex_t] get_part_local_vertex_range() - vertex_t get_part_local_vertex_first() - vertex_t get_part_local_vertex_last() - pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) - vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) - size_t get_part_number_of_matrix_partitions() - vertex_t get_part_matrix_partition_major_first(size_t partition_idx) - vertex_t get_part_matrix_partition_major_last(size_t partition_idx) - vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) - pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() - vertex_t get_part_matrix_partition_minor_first() - vertex_t get_part_matrix_partition_minor_last() - -# 4. `groupby_gpuid_and_shuffle_values()` wrapper: -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( - const handle_t &handle, - vertex_t *edgelist_major_vertices, - vertex_t *edgelist_minor_vertices, - weight_t* edgelist_weights, - edge_t num_edges, - bool is_hyper_partitioned) except + - - -# 5. `renumber_edgelist()` wrapper -# -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( - const handle_t &handle, - vertex_t *edgelist_major_vertices, - vertex_t *edgelist_minor_vertices, - edge_t num_edges, - bool is_hyper_partitioned, - bool do_check, - bool multi_gpu) except + - - cdef extern from "" namespace "std" nogil: cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]]) @@ -275,67 +188,3 @@ ctypedef fused GraphViewType: cdef coo_to_df(GraphCOOPtrType graph) cdef csr_to_series(GraphCSRPtrType graph) cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*) - - -# C++ utilities specifically for Cython -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - - ctypedef enum numberTypeEnum: - int32Type "cugraph::cython::numberTypeEnum::int32Type" - int64Type "cugraph::cython::numberTypeEnum::int64Type" - floatType "cugraph::cython::numberTypeEnum::floatType" - doubleType "cugraph::cython::numberTypeEnum::doubleType" - - cdef cppclass graph_container_t: - pass - - cdef void populate_graph_container( - graph_container_t &graph_container, - handle_t &handle, - void *src_vertices, - void *dst_vertices, - void *weights, - void *vertex_partition_offsets, - numberTypeEnum vertexType, - numberTypeEnum edgeType, - numberTypeEnum weightType, - size_t num_partition_edges, - size_t num_global_vertices, - size_t num_global_edges, - bool sorted_by_degree, - bool transposed, - bool multi_gpu) except + - - ctypedef enum graphTypeEnum: - LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR" - LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC" - LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO" - - cdef void populate_graph_container_legacy( - graph_container_t &graph_container, - graphTypeEnum legacyType, - const handle_t &handle, - void *offsets, - void *indices, - void *weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, - numberTypeEnum weightType, - size_t num_global_vertices, - size_t num_global_edges, - int *local_vertices, - int *local_edges, - int *local_offsets) except + - - cdef cppclass cy_multi_edgelists_t: - size_t number_of_vertices - size_t number_of_edges - size_t number_of_subgraph - unique_ptr[device_buffer] src_indices - unique_ptr[device_buffer] dst_indices - unique_ptr[device_buffer] edge_data - unique_ptr[device_buffer] subgraph_offsets - -cdef extern from "" namespace "std" nogil: - cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) - cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd new file mode 100644 index 00000000000..10c90f44cb8 --- /dev/null +++ b/python/cugraph/structure/graph_utilities.pxd @@ -0,0 +1,173 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from cugraph.raft.common.handle cimport * +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport pair +from libcpp.vector cimport vector +from rmm._lib.device_buffer cimport device_buffer + +# C++ graph utilities +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + ctypedef enum numberTypeEnum: + int32Type "cugraph::cython::numberTypeEnum::int32Type" + int64Type "cugraph::cython::numberTypeEnum::int64Type" + floatType "cugraph::cython::numberTypeEnum::floatType" + doubleType "cugraph::cython::numberTypeEnum::doubleType" + + cdef cppclass graph_container_t: + pass + + cdef void populate_graph_container( + graph_container_t &graph_container, + handle_t &handle, + void *src_vertices, + void *dst_vertices, + void *weights, + void *vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, + numberTypeEnum weightType, + size_t num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + bool sorted_by_degree, + bool transposed, + bool multi_gpu) except + + + ctypedef enum graphTypeEnum: + LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR" + LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC" + LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO" + + cdef void populate_graph_container_legacy( + graph_container_t &graph_container, + graphTypeEnum legacyType, + const handle_t &handle, + void *offsets, + void *indices, + void *weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + size_t num_global_vertices, + size_t num_global_edges, + int *local_vertices, + int *local_edges, + int *local_offsets) except + + + cdef cppclass cy_multi_edgelists_t: + size_t number_of_vertices + size_t number_of_edges + size_t number_of_subgraph + unique_ptr[device_buffer] src_indices + unique_ptr[device_buffer] dst_indices + unique_ptr[device_buffer] edge_data + unique_ptr[device_buffer] subgraph_offsets + +cdef extern from "" namespace "std" nogil: + cdef device_buffer move(device_buffer) + cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) + cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) + #cdef device_buffer move(device_buffer) + #cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + +# renumber_edgelist() interface utilities: +# +# +# 1. `cdef extern partition_t`: +# +cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": + + cdef cppclass partition_t[vertex_t]: + pass + + +# 2. return type for shuffle: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + major_minor_weights_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_major_wrap() + pair[unique_ptr[device_buffer], size_t] get_minor_wrap() + pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + + +ctypedef fused shuffled_vertices_t: + major_minor_weights_t[int, float] + major_minor_weights_t[int, double] + major_minor_weights_t[long, float] + major_minor_weights_t[long, double] + +# 3. return type for renumber: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass renum_quad_t[vertex_t, edge_t]: + renum_quad_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_dv_wrap() + vertex_t& get_num_vertices() + edge_t& get_num_edges() + int get_part_row_size() + int get_part_col_size() + int get_part_comm_rank() + unique_ptr[vector[vertex_t]] get_partition_offsets() + pair[vertex_t, vertex_t] get_part_local_vertex_range() + vertex_t get_part_local_vertex_first() + vertex_t get_part_local_vertex_last() + pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) + size_t get_part_number_of_matrix_partitions() + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) + pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() + vertex_t get_part_matrix_partition_minor_first() + vertex_t get_part_matrix_partition_minor_last() + +# 4. `sort_and_shuffle_values()` wrapper: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + weight_t* edgelist_weights, + edge_t num_edges, + bool is_hyper_partitioned) except + + +# 5. `renumber_edgelist()` wrapper +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + edge_t num_edges, + bool is_hyper_partitioned, + bool do_check, + bool multi_gpu) except + diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd index 0ec9c914347..c22e64841af 100644 --- a/python/cugraph/structure/utils.pxd +++ b/python/cugraph/structure/utils.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,9 +19,6 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "raft/handle.hpp" namespace "raft": - cdef cppclass handle_t: - handle_t() except + cdef extern from "functions.hpp" namespace "cugraph": diff --git a/python/cugraph/structure/utils_wrapper.pyx b/python/cugraph/structure/utils_wrapper.pyx index 00af5813056..65c1ca09750 100644 --- a/python/cugraph/structure/utils_wrapper.pyx +++ b/python/cugraph/structure/utils_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t import cudf -import rmm import numpy as np from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index 5b73d23045c..0467bf05090 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from libcpp cimport bool diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index ae346aea953..f475842a7bf 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,14 +17,11 @@ # cython: language_level = 3 cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - import cudf -import rmm import numpy as np def bfs(input_graph, start, directed=True, diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index e4b709cb879..59253a5f1e4 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx index 730fe0db94e..36e4797e0c8 100644 --- a/python/cugraph/traversal/sssp_wrapper.pyx +++ b/python/cugraph/traversal/sssp_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,17 +18,14 @@ cimport cugraph.traversal.sssp as c_sssp cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import graph_primtypes_wrapper - from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.float cimport FLT_MAX_EXP - import cudf -import rmm import numpy as np + def sssp(input_graph, source): """ Call sssp diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx index 5f87c42a638..6eccce57a37 100644 --- a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx +++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx @@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t from numba import cuda - import cudf import numpy as np From b7e68092f09f0062e632b22bea14e3175cbc4284 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 4 Mar 2021 16:21:17 -0500 Subject: [PATCH 14/51] Create cuGraph developers guide (#1431) We have long needed a developer's guide. This PR creates one. The expectation is that this document will be adapted over time. I have copied the version from cuDF, edited things to make it cuGraph focused. There are documentation holes here that will need to eventually be filled in. However, it's probably useful to get something with holes in it that is approved so we can move forward with some documented standard. Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Rick Ratzel (@rlratzel) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1431 --- cpp/docs/DEVELOPER_GUIDE.md | 277 ++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 cpp/docs/DEVELOPER_GUIDE.md diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md new file mode 100644 index 00000000000..ba24d68aca5 --- /dev/null +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -0,0 +1,277 @@ +# cuGraph C++ Developer Guide + +This document serves as a guide for contributors to cuGraph C++ code. Developers should also refer +to these additional files for further documentation of cuGraph best practices. + +* [Documentation Guide](TODO) for guidelines on documenting cuGraph code. +* [Testing Guide](TODO) for guidelines on writing unit tests. +* [Benchmarking Guide](TODO) for guidelines on writing unit benchmarks. + +# Overview + +cuGraph includes a C++ library that provides GPU-accelerated graph algorithms for processing +sparse graphs. + +## Lexicon + +This section defines terminology used within cuGraph + +### COO + +COOrdinate format is one of the standard formats for representing graph data. In COO format the +graph is represented as an array of source vertex ids, an array of destination vertex ids, and an +optional array of edge weights. Edge i is identified by source_vertex_id[i], destination_vertex_id[i] +and weight[i]. + +### MORE + +# Directory Structure and File Naming + +External/public cuGraph APIs are grouped based on functionality into an appropriately titled +header file in `cugraph/cpp/include/`. For example, `cugraph/cpp/include/graph.hpp` +contains the definition of the (legacy) graph objects. Note the `.hpp` +file extension used to indicate a C++ header file. + +Header files should use the `#pragma once` include guard. + +## File extensions + +- `.hpp` : C++ header files +- `.cpp` : C++ source files +- `.cu` : CUDA C++ source files +- `.cuh` : Headers containing CUDA device code + +Header files and source files should use `.hpp` and `.cpp` extensions unless they must +be compiled by nvcc. `.cu` and `.cuh` files are more expensive to compile, so we want +to minimize the use of these files to only when necessary. A good indicator of the need +to use a `.cu` or `.cuh` file is the inclusion of `__device__` and other +symbols that are only recognized by `nvcc`. Another indicator is Thrust +algorithm APIs with a device execution policy (always `rmm::exec_policy` in cuGraph). + +## Code and Documentation Style and Formatting + +cuGraph code uses [snake_case](https://en.wikipedia.org/wiki/Snake_case) for all names except in a +few cases: unit tests and test case names may use Pascal case, aka +[UpperCamelCase](https://en.wikipedia.org/wiki/Camel_case). We do not use +[Hungarian notation](https://en.wikipedia.org/wiki/Hungarian_notation), except for the following examples: + * device data variables should be prefaced by d_ if it makes the intent clearer + * host data variables should be prefaced by h_ if it makes the intent clearer + * template parameters defining a type should be suffixed with _t + * private member variables are typically suffixed with an underscore + +```c++ +template +void algorithm_function(graph_t const &g) +{ + ... +} + +template +class utility_class +{ + ... + private: + vertex_t num_vertices_{}; +} +``` + +C++ formatting is enforced using `clang-format`. You should configure `clang-format` on your +machine to use the `cugraph/cpp/.clang-format` configuration file, and run `clang-format` on all +changed code before committing it. The easiest way to do this is to configure your editor to +"format on save". + +Aspects of code style not discussed in this document and not automatically enforceable are typically +caught during code review, or not enforced. + +### C++ Guidelines + +In general, we recommend following +[C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines). We also +recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/watch?v=W2tWOdzgXHA), +and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." + + * Prefer algorithms from STL and Thrust to raw loops. + * Prefer cugraph and RMM to raw pointers and raw memory allocation. + +Documentation is discussed in the [Documentation Guide](TODO). + +### Includes + +The following guidelines apply to organizing `#include` lines. + + * Group includes by library (e.g. cuGraph, RMM, Thrust, STL). `clang-format` will respect the + groupings and sort the individual includes within a group lexicographically. + * Separate groups by a blank line. + * Order the groups from "nearest" to "farthest". In other words, local includes, then includes + from other RAPIDS libraries, then includes from related libraries, like ``, then + includes from dependencies installed with cuGraph, and then standard headers (for example ``, + ``). + * Use <> instead of "" unless the header is in the same directory as the source file. + * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping + and brackets wrong. + * Always check that includes are only necessary for the file in which they are included. + Try to avoid excessive including especially in header files. Double check this when you remove + code. + +# cuGraph Data Structures + +Application data in cuGraph is contained in graph objects, but there are a variety of other +data structures you will use when developing cuGraph code. + +## Views and Ownership + +Resource ownership is an essential concept in cuGraph. In short, an "owning" object owns a +resource (such as device memory). It acquires that resource during construction and releases the +resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning" +object does not own resources. Any class in cuGraph with the `*_view` suffix is non-owning. + +## `rmm::device_memory_resource` + +cuGraph allocates all device memory via RMM memory resources (MR). See the +[RMM documentation](https://github.com/rapidsai/rmm/blob/main/README.md) for details. + +## Streams + +CUDA streams are not yet exposed in external cuGraph APIs. + +We are currently investigating the best technique for exposing this. + +### Memory Management + +cuGraph code generally eschews raw pointers and direct memory allocation. Use RMM classes built to +use `device_memory_resource`(*)s for device memory allocation with automated lifetime management. + +#### `rmm::device_buffer` +Allocates a specified number of bytes of untyped, uninitialized device memory using a +`device_memory_resource`. If no resource is explicitly provided, uses +`rmm::mr::get_current_device_resource()`. + +`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s +device memory, whereas a move moves ownership of the device memory from one `device_buffer` to +another. + +```c++ +// Allocates at least 100 bytes of uninitialized device memory +// using the specified resource and stream +rmm::device_buffer buff(100, stream, mr); +void * raw_data = buff.data(); // Raw pointer to underlying device memory + +rmm::device_buffer copy(buff); // Deep copies `buff` into `copy` +rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to` + +custom_memory_resource *mr...; +rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource +``` + +#### `rmm::device_uvector` + +Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with key +differences: +- As an optimization, elements are uninitialized and no synchronization occurs at construction. +This limits the types `T` to trivially copyable types. +- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream +on which the operation is performed). + +## Namespaces + +### External +All public cuGraph APIs should be placed in the `cugraph` namespace. Example: +```c++ +namespace cugraph{ + void public_function(...); +} // namespace cugraph +``` + +### Internal + +Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* +namespace, depending on the situation. + +#### `detail` namespace + +Functions or objects that will be used across *multiple* translation units (i.e., source files), +should be exposed in an internal header file and placed in the `detail` namespace. Example: + +```c++ +// some_utilities.hpp +namespace cugraph{ +namespace detail{ +void reusable_helper_function(...); +} // namespace detail +} // namespace cugraph +``` + +#### Anonymous namespace + +Functions or objects that will only be used in a *single* translation unit should be defined in an +*anonymous* namespace in the source file where it is used. Example: + +```c++ +// some_file.cpp +namespace{ +void isolated_helper_function(...); +} // anonymous namespace +``` + +[**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) + +# Error Handling + +cuGraph follows conventions (and provides utilities) enforcing compile-time and run-time +conditions and detecting and handling CUDA errors. Communication of errors is always via C++ +exceptions. + +## Runtime Conditions + +Use the `CUGRAPH_EXPECTS` macro to enforce runtime conditions necessary for correct execution. + +Example usage: +```c++ +CUGRAPH_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch"); +``` + +The first argument is the conditional expression expected to resolve to `true` under normal +conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of `cugraph::logic_error` is thrown. The second argument to `CUGRAPH_EXPECTS` is a short description of the +error that has occurred and is used for the exception's `what()` message. + +There are times where a particular code path, if reached, should indicate an error no matter what. +For example, often the `default` case of a `switch` statement represents an invalid alternative. +Use the `CUGRAPH_FAIL` macro for such errors. This is effectively the same as calling +`CUGRAPH_EXPECTS(false, reason)`. + +Example: +```c++ +CUGRAPH_FAIL("This code path should not be reached."); +``` + +### CUDA Error Checking + +Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This +macro throws a `cugraph::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The +thrown exception includes a description of the CUDA error code in it's `what()` message. + +Example: + +```c++ +CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) ); +``` + +## Compile-Time Conditions + +Use `static_assert` to enforce compile-time conditions. For example, + +```c++ +template +void trivial_types_only(T t){ + static_assert(std::is_trivial::value, "This function requires a trivial type."); +... +} +``` + +# Data Types + +TBD + +# Type Dispatcher + +TBD From e5250403fd0f4f2593ab829c57358152bc879306 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 5 Mar 2021 08:15:52 -0600 Subject: [PATCH 15/51] Streams infra + support in egonet (#1435) - Stream synchronization behavior: switched to per-thread default stream instead of the legacy default stream - Update raft tag - EgoNet upgrade to use `uvector` instead of `device_vector` - EgoNet upgrade to execute on a different stream for each seed - Perf analysis timers/app for EgoNet Concurrency is limited by the number of available blocks on the device. Thrust-based codes may request a lot of blocks without a way to control this. In practice, smaller graphs leverage concurrency better than larger ones where tasks may end up waiting for available resources. We may wait on #1407 before reconciling and merging this Close #957 Authors: - Alex Fender (@afender) Approvers: - Seunghwa Kang (@seunghwak) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1435 --- cpp/CMakeLists.txt | 8 +- cpp/include/algorithms.hpp | 2 +- cpp/src/community/egonet.cu | 116 +++++++++---- cpp/src/experimental/induced_subgraph.cu | 11 +- cpp/src/utilities/high_res_timer.hpp | 4 +- cpp/tests/community/egonet_test.cu | 174 ++++++++++++++++++- python/cugraph/community/egonet_wrapper.pyx | 6 +- python/cugraph/structure/graph_primtypes.pxd | 1 - 8 files changed, 276 insertions(+), 46 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 108cb0748a8..b0365c3cfd6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -113,7 +113,6 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") - # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) @@ -298,7 +297,8 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG a3461b201ea1c9f61571f1927274f739e775d2d2 + GIT_TAG b055cf862a599fd45537d21a309edd8a6e06da4c + SOURCE_SUBDIR raft ) @@ -446,6 +446,10 @@ target_link_directories(cugraph # add_dependencies(cugraph gunrock_ext) +# Per-thread default stream option see https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html +# The per-thread default stream does not synchronize with other streams +target_compile_definitions(cugraph PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) + ################################################################################################### # - include paths --------------------------------------------------------------------------------- target_include_directories(cugraph diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 0b8bd59587f..c3a4f3ec985 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1167,7 +1167,7 @@ void katz_centrality(raft::handle_t const &handle, * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. + * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream. * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view. * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs). * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index fa788aa307b..067d27f9a92 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -22,6 +22,9 @@ #include #include +#include +#include + #include #include @@ -34,6 +37,8 @@ #include #include +#include + namespace { /* @@ -61,58 +66,111 @@ extract( vertex_t n_subgraphs, vertex_t radius) { - auto v = csr_view.get_number_of_vertices(); - auto e = csr_view.get_number_of_edges(); - auto stream = handle.get_stream(); - float avg_degree = e / v; + auto v = csr_view.get_number_of_vertices(); + auto e = csr_view.get_number_of_edges(); + auto user_stream_view = handle.get_stream_view(); rmm::device_vector neighbors_offsets(n_subgraphs + 1); rmm::device_vector neighbors; - // It is the right thing to accept device memory for source_vertex - // FIXME consider adding a device API to BFS (ie. accept source on the device) std::vector h_source_vertex(n_subgraphs); - raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream); + std::vector h_neighbors_offsets(n_subgraphs + 1); + + raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, user_stream_view.value()); + + // Streams will allocate concurrently later + std::vector> reached{}; + reached.reserve(handle.get_num_internal_streams()); - // reserve some reasonable memory, but could grow larger than that - neighbors.reserve(v + avg_degree * n_subgraphs * radius); - neighbors_offsets[0] = 0; - // each source should be done concurently in the future + // h_source_vertex[i] is used by other streams in the for loop + user_stream_view.synchronize(); +#ifdef TIMING + HighResTimer hr_timer; + hr_timer.start("ego_neighbors"); +#endif for (vertex_t i = 0; i < n_subgraphs; i++) { + // get light handle from worker pool + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + + // Allocations and operations are attached to the worker stream + rmm::device_uvector local_reach(v, worker_stream_view); + reached.push_back(std::move(local_reach)); + // BFS with cutoff - rmm::device_vector reached(v); - rmm::device_vector predecessors(v); // not used + // consider adding a device API to BFS (ie. accept source on the device) + rmm::device_uvector predecessors(v, worker_stream_view); // not used bool direction_optimizing = false; - cugraph::experimental::bfs(handle, + thrust::fill(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), + std::numeric_limits::max()); + thrust::fill( + rmm::exec_policy(worker_stream_view), reached[i].begin(), reached[i].begin() + 100, 1.0); + + cugraph::experimental::bfs(light_handle, csr_view, - reached.data().get(), - predecessors.data().get(), + reached[i].data(), + predecessors.data(), h_source_vertex[i], direction_optimizing, radius); // identify reached vertex ids from distance array - thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::transform(rmm::exec_policy(worker_stream_view), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(v), - reached.begin(), - reached.begin(), + reached[i].begin(), + reached[i].begin(), [sentinel = std::numeric_limits::max()] __device__( auto id, auto val) { return val < sentinel ? id : sentinel; }); // removes unreached data - auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream), - reached.begin(), - reached.end(), + auto reached_end = thrust::remove(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), std::numeric_limits::max()); + // release temp storage + reached[i].resize(thrust::distance(reached[i].begin(), reached_end), worker_stream_view); + reached[i].shrink_to_fit(worker_stream_view); + } - // update extraction input - size_t n_reached = thrust::distance(reached.begin(), reached_end); - neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached; - if (neighbors_offsets[i + 1] > neighbors.capacity()) - neighbors.reserve(neighbors_offsets[i + 1] * 2); - neighbors.insert(neighbors.end(), reached.begin(), reached_end); + // wait on every one to identify their neighboors before proceeding to concatenation + handle.wait_on_internal_streams(); + + // Construct neighboors offsets (just a scan on neighborhod vector sizes) + h_neighbors_offsets[0] = 0; + for (vertex_t i = 0; i < n_subgraphs; i++) { + h_neighbors_offsets[i + 1] = h_neighbors_offsets[i] + reached[i].size(); + } + raft::update_device(neighbors_offsets.data().get(), + &h_neighbors_offsets[0], + n_subgraphs + 1, + user_stream_view.value()); + neighbors.resize(h_neighbors_offsets[n_subgraphs]); + user_stream_view.synchronize(); + + // Construct the neighboors list concurrently + for (vertex_t i = 0; i < n_subgraphs; i++) { + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + thrust::copy(rmm::exec_policy(worker_stream_view), + reached[i].begin(), + reached[i].end(), + neighbors.begin() + h_neighbors_offsets[i]); + + // reached info is not needed anymore + reached[i].resize(0, worker_stream_view); + reached[i].shrink_to_fit(worker_stream_view); } + // wait on every one before proceeding to grouped extraction + handle.wait_on_internal_streams(); + +#ifdef TIMING + hr_timer.stop(); + hr_timer.display(std::cout); +#endif + // extract return cugraph::experimental::extract_induced_subgraphs( handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs); @@ -207,4 +265,4 @@ extract_ego(raft::handle_t const &, int64_t, int64_t); } // namespace experimental -} // namespace cugraph +} // namespace cugraph \ No newline at end of file diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index a88adf76ef4..5cda36ad7e2 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -32,6 +32,8 @@ #include +#include + namespace cugraph { namespace experimental { @@ -52,6 +54,10 @@ extract_induced_subgraphs( size_t num_subgraphs, bool do_expensive_check) { +#ifdef TIMING + HighResTimer hr_timer; + hr_timer.start("extract_induced_subgraphs"); +#endif // FIXME: this code is inefficient for the vertices with their local degrees much larger than the // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are // included in the local neighbors is more efficient than searching the local neighbors are @@ -244,7 +250,10 @@ extract_induced_subgraphs( subgraph_offsets + (num_subgraphs + 1), subgraph_vertex_output_offsets.begin(), subgraph_edge_offsets.begin()); - +#ifdef TIMING + hr_timer.stop(); + hr_timer.display(std::cout); +#endif return std::make_tuple(std::move(edge_majors), std::move(edge_minors), std::move(edge_weights), diff --git a/cpp/src/utilities/high_res_timer.hpp b/cpp/src/utilities/high_res_timer.hpp index f2d6bc6e13f..a731c5edc9d 100644 --- a/cpp/src/utilities/high_res_timer.hpp +++ b/cpp/src/utilities/high_res_timer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ #include #include +//#define TIMING + class HighResTimer { public: HighResTimer() : timers() {} diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index ef2699bd1d0..a9224b42bc1 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -35,6 +36,8 @@ #include #include +#include + typedef struct InducedEgo_Usecase_t { std::string graph_file_full_path{}; std::vector ego_sources{}; @@ -67,7 +70,8 @@ class Tests_InducedEgo : public ::testing::TestWithParam { template void run_current_test(InducedEgo_Usecase const& configuration) { - raft::handle_t handle{}; + int n_streams = std::min(configuration.ego_sources.size(), static_cast(128)); + raft::handle_t handle(n_streams); cugraph::experimental::graph_t graph( handle); @@ -88,14 +92,18 @@ class Tests_InducedEgo : public ::testing::TestWithParam { rmm::device_uvector d_ego_edgelist_dst(0, handle.get_stream()); rmm::device_uvector d_ego_edgelist_weights(0, handle.get_stream()); rmm::device_uvector d_ego_edge_offsets(0, handle.get_stream()); - + HighResTimer hr_timer; + hr_timer.start("egonet"); + cudaProfilerStart(); std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) = cugraph::experimental::extract_ego(handle, graph_view, d_ego_sources.data(), static_cast(configuration.ego_sources.size()), configuration.radius); - + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); std::vector h_cugraph_ego_edge_offsets(d_ego_edge_offsets.size()); std::vector h_cugraph_ego_edgelist_src(d_ego_edgelist_src.size()); std::vector h_cugraph_ego_edgelist_dst(d_ego_edgelist_dst.size()); @@ -118,13 +126,11 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] == d_ego_edgelist_src.size()); for (size_t i = 0; i < configuration.ego_sources.size(); i++) - ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] < h_cugraph_ego_edge_offsets[i + 1]); + ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] <= h_cugraph_ego_edge_offsets[i + 1]); auto n_vertices = graph_view.get_number_of_vertices(); for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { - ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] >= 0); - ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] < n_vertices); - ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] >= 0); - ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] < n_vertices); + ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); + ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); } /* @@ -170,6 +176,156 @@ INSTANTIATE_TEST_CASE_P( InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), - InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, 2, true))); + InducedEgo_Usecase( + "test/datasets/karate.mtx", std::vector{5, 9, 3, 10, 12, 13}, 2, true))); +// For perf analysis +/* +INSTANTIATE_TEST_CASE_P( +simple_test, +Tests_InducedEgo, +::testing::Values( +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), +InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), +InducedEgo_Usecase( +"test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), +InducedEgo_Usecase( +"test/datasets/soc-LiveJournal1.mtx", +std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, + 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, + 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, + 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, + 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, + 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, + 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, + 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, + 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, + 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, + 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, + 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, + 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, + 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, + 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, + 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, + 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, + 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, + 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, 984983, 3114832, 1967741, + 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, 686026, 3989015, + 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, 2186957, + 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, + 2606530, 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, + 134931, 736397, 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, + 1881883, 4757859, 3596257, 2358088, 2578758, 447504, 590720, 1717038, 1869795, + 1133885, 3027521, 840312, 2818881, 3654321, 2730947, 353585, 1134903, 2223378, + 1508824, 3662521, 1363776, 2712071, 288441, 1204581, 3502242, 4645567, 2767267, + 1514366, 3956099, 1422145, 1216608, 2253360, 189132, 4238225, 1345783, 451571, 1599442, + 3237284, 4711405, 929446, 1857675, 150759, 1277633, 761210, 138628, 1026833, + 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, 2044964, 716256, 1660632, + 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, 1870953, 1516385, + 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, 4285177, + 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, + 4410835, 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, + 1600667, 2176195, 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, + 1647273, 3044136, 950354, 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, + 3867343, 72329, 919189, 992521, 3445975, 4712557, 4680974, 188419, 2612093, + 1991268, 3566207, 2281468, 3859078, 2492806, 3398628, 763441, 2679107, 2554420, + 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, 4013060, 3617653, 2040022, + 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, 1083926, 503974, 3529226, + 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, 3022790, 4316365, 76365, + 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, 2938808, 562788, + 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, 214467, + 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, + 1513424, 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, + 3108096, 4311775, 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, + 1861797, 3566460, 4537673, 1164093, 3499764, 4553071, 3518985, 847658, 918948, + 2922351, 1056144, 652895, 1013195, 780505, 1702928, 3562838, 1432719, 2405207, + 1054920, 641647, 2240939, 3617702, 383165, 652641, 879593, 1810739, 2096385, + 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, 2422190, 527647, 1251821, + 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, 2433139, 1710383, + 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, 16864, 2081770, + 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, 2630042, + 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, + 481509, 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, + 4002180, 4718138, 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, + 3828090, 3253691, 4839313, 1203624, 584938, 3901482, 1747543, 1572737, 3533226, + 774708, 1691195, 1037110, 1557763, 225120, 4424243, 3524086, 1717663, 4332507, + 3513592, 4274932, 1232118, 873498, 1416042, 2488925, 111391, 4704545, 4492545, + 445317, 1584812, 2187737, 2471948, 3731678, 219255, 2282627, 2589971, 2372185, + 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, 3184084, 3690756, + 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, 2722165, + 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, + 2596952, 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, + 2174584, 587481, 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, + 4819428, 2591357, 48490, 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, + 37251, 3729300, 2726300, 644966, 1623020, 1419070, 4646747, 2417222, 2680238, + 2561083, 1793801, 2349366, 339747, 611366, 4684147, 4356907, 1277161, 4510381, + 3218352, 4161658, 3200733, 1172372, 3997786, 3169266, 3353418, 2248955, 2875885, + 2365369, 498208, 2968066, 2681505, 2059048, 2097106, 3607540, 1121504, 2016789, + 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, 4046672, 1544367, + 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, 3690724, + 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, + 4687548, 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, + 4086775, 615155, 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, + 2672602, 838828, 4051647, 1709120, 3074610, 693235, 4356087, 3018806, 239410, + 2431497, 691186, 766276, 4462126, 859155, 2370304, 1571808, 1938673, 1694955, + 3871296, 4245059, 3987376, 301524, 2512461, 3410437, 3300380, 684922, 4581995, + 3599557, 683515, 1850634, 3704678, 1937490, 2035591, 3718533, 2065879, 3160765, + 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, 713633, 1976262, 135946, + 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, 4179598, 961045, + 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, 4719693, + 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, + 3504814, 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, + 4730666, 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, + 4468651, 2478792, 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, + 3218600, 1811100, 3443356, 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, + 4782811, 3144712, 3523466, 1491315, 3955852, 1838410, 3164028, 1092543, 776459, + 2959379, 2541744, 4064418, 3908320, 2854145, 3960709, 1348188, 977678, 853619, + 1304291, 2848702, 1657913, 1319826, 3322665, 788037, 2913686, 4471279, 1766285, 348304, + 56570, 1892118, 4017244, 401006, 3524539, 4310134, 1624693, 4081113, 957511, 849400, + 129975, 2616130, 378537, 1556787, 3916162, 1039980, 4407778, 2027690, 4213675, + 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, 1255588, 1947964, + 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, 1123513, + 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, 41760, + 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, + 4335712, 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, + 1382747, 3537242, 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, + 119369, 2856973, 2945854, 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, + 2886508, 1573965, 990618, 3053734, 2918742, 4508753, 1032149, 60943, 4291620, + 722607, 2883224, 169359, 4356585, 3725543, 3678729, 341673, 3592828, 4077251, + 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, 3113385, 4660578, 2539973, + 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, 3796951, 956299, 141730, + 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, 3573511, 314081, 577688, + 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, 1175290, 3749667, + 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, 2079145, + 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, +2, +false)));*/ CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 7d2a1169e25..ff9f2b8b3de 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -56,9 +56,11 @@ def egonet(input_graph, vertices, radius=1): # Pointers for egonet cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] n_subgraphs = vertices.size - + n_streams = 1 + if n_subgraphs > 1 : + n_streams = min(n_subgraphs, 32) cdef unique_ptr[handle_t] handle_ptr - handle_ptr.reset(new handle_t()) + handle_ptr.reset(new handle_t(n_streams)) handle_ = handle_ptr.get(); cdef graph_container_t graph_container diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 3bb42633ac1..1e0d9626727 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -23,7 +23,6 @@ from libcpp.vector cimport vector from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer - cdef extern from "graph.hpp" namespace "cugraph": ctypedef enum PropType: From 85012ca1b8c99d9fe90a2a3a516e1a62fe0324ec Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 5 Mar 2021 09:16:39 -0500 Subject: [PATCH 16/51] Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP to use the new R-mat graph generator (#1438) - [x] Refactor cuGraph C++ test library - [x] Add a utility function to create a graph object from the R-mat generator - [x] Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP tests to use the new R-mat graph generator This partially addresses https://github.com/rapidsai/cugraph/issues/1382 and is a per-requsite for graph primitives performance optimization. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Andrei Schaffer (@aschaffer) - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1438 --- cpp/tests/CMakeLists.txt | 5 +- cpp/tests/experimental/bfs_test.cpp | 49 +- .../experimental/katz_centrality_test.cpp | 66 ++- cpp/tests/experimental/pagerank_test.cpp | 82 ++- cpp/tests/experimental/sssp_test.cpp | 49 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 113 +++- .../utilities/generate_graph_from_edgelist.cu | 526 ++++++++++++++++++ ...ies.cu => matrix_market_file_utilities.cu} | 227 +------- cpp/tests/utilities/misc_utilities.cpp | 33 ++ cpp/tests/utilities/rmat_utilities.cu | 431 ++++++++++++++ cpp/tests/utilities/test_utilities.hpp | 54 +- 11 files changed, 1337 insertions(+), 298 deletions(-) create mode 100644 cpp/tests/utilities/generate_graph_from_edgelist.cu rename cpp/tests/utilities/{test_utilities.cu => matrix_market_file_utilities.cu} (71%) create mode 100644 cpp/tests/utilities/misc_utilities.cpp create mode 100644 cpp/tests/utilities/rmat_utilities.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 18dfdbc8f63..1db2f9df42e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,7 +20,10 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 5b8add98560..ad9ece99ef9 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -73,17 +73,26 @@ void bfs_reference(edge_t const* offsets, } typedef struct BFS_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; size_t source{false}; BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } BFS_Usecase; class Tests_BFS : public ::testing::TestWithParam { @@ -104,8 +113,23 @@ class Tests_BFS : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, false, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -193,13 +217,16 @@ class Tests_BFS : public ::testing::TestWithParam { // FIXME: add tests for type combinations TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_BFS, - ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), - BFS_Usecase("test/datasets/polbooks.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 0), - BFS_Usecase("test/datasets/netscience.mtx", 100), - BFS_Usecase("test/datasets/wiki2003.mtx", 1000), - BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_BFS, + ::testing::Values( + BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/polbooks.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 100), + BFS_Usecase("test/datasets/wiki2003.mtx", 1000), + BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000), + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 945248cc4de..776bb60716c 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -89,18 +89,31 @@ void katz_centrality_reference(edge_t const* offsets, } typedef struct KatzCentrality_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + bool test_weighted{false}; KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted) : test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } KatzCentrality_Usecase; class Tests_KatzCentrality : public ::testing::TestWithParam { @@ -119,8 +132,26 @@ class Tests_KatzCentrality : public ::testing::TestWithParam graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -220,13 +251,26 @@ TEST_P(Tests_KatzCentrality, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_KatzCentrality, - ::testing::Values(KatzCentrality_Usecase("test/datasets/karate.mtx", false), - KatzCentrality_Usecase("test/datasets/karate.mtx", true), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), - KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), - KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), - KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true))); + ::testing::Values( + KatzCentrality_Usecase("test/datasets/karate.mtx", false), + KatzCentrality_Usecase("test/datasets/karate.mtx", true), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 514f73e3311..ff3b073cbc7 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -124,7 +124,8 @@ void pagerank_reference(edge_t const* offsets, } typedef struct PageRank_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + double personalization_ratio{0.0}; bool test_weighted{false}; @@ -133,12 +134,24 @@ typedef struct PageRank_Usecase_t { bool test_weighted) : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } PageRank_Usecase; class Tests_PageRank : public ::testing::TestWithParam { @@ -157,8 +170,26 @@ class Tests_PageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -296,21 +327,34 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_PageRank, - ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), - PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), - PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + ::testing::Values( + PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 7fd59d49a25..611abcb0d75 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -79,17 +79,26 @@ void sssp_reference(edge_t const* offsets, } typedef struct SSSP_Usecase_t { - std::string graph_file_full_path{}; + cugraph::test::input_graph_specifier_t input_graph_specifier{}; size_t source{false}; SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; + + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } } SSSP_Usecase; class Tests_SSSP : public ::testing::TestWithParam { @@ -108,8 +117,23 @@ class Tests_SSSP : public ::testing::TestWithParam { cugraph::experimental::graph_t graph(handle); std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, true, false); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, false) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + false); auto graph_view = graph.view(); std::vector h_offsets(graph_view.get_number_of_vertices() + 1); @@ -209,16 +233,13 @@ class Tests_SSSP : public ::testing::TestWithParam { // FIXME: add tests for type combinations TEST_P(Tests_SSSP, CheckInt32Int32Float) { run_current_test(GetParam()); } -#if 0 -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_SSSP, - ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0))); -#else -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_SSSP, - ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0), - SSSP_Usecase("test/datasets/dblp.mtx", 0), - SSSP_Usecase("test/datasets/wiki2003.mtx", 1000))); -#endif +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_SSSP, + ::testing::Values( + SSSP_Usecase("test/datasets/karate.mtx", 0), + SSSP_Usecase("test/datasets/dblp.mtx", 0), + SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index cf9f452162b..85ee9a4243e 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -28,25 +28,38 @@ #include -typedef struct Pagerank_Usecase_t { - std::string graph_file_full_path{}; +typedef struct PageRank_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + double personalization_ratio{0.0}; bool test_weighted{false}; - Pagerank_Usecase_t(std::string const& graph_file_path, + PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, bool test_weighted) : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { + std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; } else { graph_file_full_path = graph_file_path; } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; }; -} Pagerank_Usecase; -class Tests_MGPageRank : public ::testing::TestWithParam { + PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} PageRank_Usecase; + +class Tests_MGPageRank : public ::testing::TestWithParam { public: Tests_MGPageRank() {} static void SetupTestCase() {} @@ -57,7 +70,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run template - void run_current_test(Pagerank_Usecase const& configuration) + void run_current_test(PageRank_Usecase const& configuration) { // 1. initialize handle @@ -78,16 +91,51 @@ class Tests_MGPageRank : public ::testing::TestWithParam { cugraph::experimental::graph_t sg_graph(handle); rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); std::tie(sg_graph, d_sg_renumber_map_labels) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, true); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + true) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + true); auto sg_graph_view = sg_graph.view(); cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, true); + configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test::read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + true) + : cugraph::test::generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + true); auto mg_graph_view = mg_graph.view(); @@ -276,21 +324,34 @@ TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat) INSTANTIATE_TEST_CASE_P( simple_test, Tests_MGPageRank, - ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/karate.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/karate.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/karate.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/web-Google.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); + ::testing::Values( + PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + false), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.0, + true), + PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0.5, + true))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu new file mode 100644 index 00000000000..1b9fe6051f7 --- /dev/null +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace test { + +namespace detail { + +template +std::enable_if_t< + multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + vertex_t number_of_vertices = static_cast(vertices.size()); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + vertices.resize(thrust::distance(vertices.begin(), + thrust::remove_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices.begin(), + vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + false, comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin(), edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + edgelist_rows.shrink_to_fit(handle.get_stream()); + edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + edgelist_weights.shrink_to_fit(handle.get_stream()); + } + + // 3. renumber + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition{}; + vertex_t aggregate_number_of_vertices{}; + edge_t number_of_edges{}; + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + store_transposed ? edgelist_cols.data() : edgelist_rows.data(), + store_transposed ? edgelist_rows.data() : edgelist_cols.data(), + edgelist_rows.size(), + false, + true); + assert(aggregate_number_of_vertices == number_of_vertices); + + // 4. create a graph + + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + std::vector>{ + cugraph::experimental::edgelist_t{ + edgelist_rows.data(), + edgelist_cols.data(), + test_weighted ? edgelist_weights.data() : nullptr, + static_cast(edgelist_rows.size())}}, + partition, + number_of_vertices, + number_of_edges, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + true, + true), + std::move(renumber_map_labels)); +} + +template +std::enable_if_t< + !multi_gpu, + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector>> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + vertex_t number_of_vertices = static_cast(vertices.size()); + + // FIXME: set do_expensive_check to false once validated + auto renumber_map_labels = + renumber ? cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + store_transposed ? edgelist_cols.data() : edgelist_rows.data(), + store_transposed ? edgelist_rows.data() : edgelist_cols.data(), + static_cast(edgelist_rows.size()), + true) + : rmm::device_uvector(0, handle.get_stream()); + + // FIXME: set do_expensive_check to false once validated + return std::make_tuple( + cugraph::experimental::graph_t( + handle, + cugraph::experimental::edgelist_t{ + edgelist_rows.data(), + edgelist_cols.data(), + test_weighted ? edgelist_weights.data() : nullptr, + static_cast(edgelist_rows.size())}, + number_of_vertices, + cugraph::experimental::graph_properties_t{is_symmetric, false}, + renumber ? true : false, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) +{ + return detail:: + generate_graph_from_edgelist( + handle, + std::move(vertices), + std::move(edgelist_rows), + std::move(edgelist_cols), + std::move(edgelist_weights), + is_symmetric, + test_weighted, + renumber); +} + +// explicit instantiations + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu similarity index 71% rename from cpp/tests/utilities/test_utilities.cu rename to cpp/tests/utilities/matrix_market_file_utilities.cu index 0a7b58b32cd..ddbbac603ee 100644 --- a/cpp/tests/utilities/test_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -15,45 +15,19 @@ */ #include -#include -#include -#include #include -#include #include #include #include -#include -#include -#include -#include #include -extern "C" { -#include "mmio.h" -} - -#include -#include -#include -#include +#include namespace cugraph { namespace test { -std::string getFileName(const std::string& s) -{ - char sep = '/'; -#ifdef _WIN32 - sep = '\\'; -#endif - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} - /// Read matrix properties from Matrix Market file /** Matrix Market file is assumed to be a sparse matrix in coordinate * format. @@ -339,155 +313,13 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, is_symmetric); } -namespace detail { - template -std::enable_if_t< - multi_gpu, - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector>> -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted, - bool renumber) -{ - CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); - - // 1. read from the matrix market file - - rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); - vertex_t number_of_vertices{}; - bool is_symmetric{}; - std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = - read_edgelist_from_matrix_market_file( - handle, graph_file_full_path, test_weighted); - - rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - vertex_t{0}); - - // 2. filter non-local vertices & edges - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_size = col_comm.get_size(); - - auto vertex_key_func = - cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; - d_vertices.resize( - thrust::distance( - d_vertices.begin(), - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - [comm_rank, key_func = vertex_key_func] __device__(auto val) { - return key_func(val) != comm_rank; - })), - handle.get_stream()); - d_vertices.shrink_to_fit(handle.get_stream()); - - auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - false, comm_size, row_comm_size, col_comm_size}; - size_t number_of_local_edges{}; - if (test_weighted) { - auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( - d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + d_edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } else { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + d_edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } - - d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_rows.shrink_to_fit(handle.get_stream()); - d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_cols.shrink_to_fit(handle.get_stream()); - if (test_weighted) { - d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); - d_edgelist_weights.shrink_to_fit(handle.get_stream()); - } - - // 3. renumber - - rmm::device_uvector renumber_map_labels(0, handle.get_stream()); - cugraph::experimental::partition_t partition{}; - vertex_t aggregate_number_of_vertices{}; - edge_t number_of_edges{}; - // FIXME: set do_expensive_check to false once validated - std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = - cugraph::experimental::renumber_edgelist( - handle, - d_vertices.data(), - static_cast(d_vertices.size()), - store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), - store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), - d_edgelist_rows.size(), - false, - true); - assert(aggregate_number_of_vertices == number_of_vertices); - - // 4. create a graph - - return std::make_tuple( - cugraph::experimental::graph_t( - handle, - std::vector>{ - cugraph::experimental::edgelist_t{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - static_cast(d_edgelist_rows.size())}}, - partition, - number_of_vertices, - number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false}, - true, - true), - std::move(renumber_map_labels)); -} - -template -std::enable_if_t< - !multi_gpu, - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector>> +std::tuple, + rmm::device_uvector> read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted, @@ -508,52 +340,17 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, d_vertices.end(), vertex_t{0}); - // FIXME: set do_expensive_check to false once validated - auto renumber_map_labels = - renumber ? cugraph::experimental::renumber_edgelist( - handle, - d_vertices.data(), - static_cast(d_vertices.size()), - store_transposed ? d_edgelist_cols.data() : d_edgelist_rows.data(), - store_transposed ? d_edgelist_rows.data() : d_edgelist_cols.data(), - static_cast(d_edgelist_rows.size()), - true) - : rmm::device_uvector(0, handle.get_stream()); - - // FIXME: set do_expensive_check to false once validated - return std::make_tuple( - cugraph::experimental::graph_t( - handle, - cugraph::experimental::edgelist_t{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - static_cast(d_edgelist_rows.size())}, - number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, - renumber ? true : false, - true), - std::move(renumber_map_labels)); + return generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + is_symmetric, + test_weighted, + renumber); } -} // namespace detail - -template -std::tuple, - rmm::device_uvector> -read_graph_from_matrix_market_file(raft::handle_t const& handle, - std::string const& graph_file_full_path, - bool test_weighted, - bool renumber) -{ - return detail:: - read_graph_from_matrix_market_file( - handle, graph_file_full_path, test_weighted, renumber); -} // explicit instantiations template int32_t mm_to_coo(FILE* f, diff --git a/cpp/tests/utilities/misc_utilities.cpp b/cpp/tests/utilities/misc_utilities.cpp new file mode 100644 index 00000000000..14f0df2f35d --- /dev/null +++ b/cpp/tests/utilities/misc_utilities.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu new file mode 100644 index 00000000000..16ea7a486fc --- /dev/null +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include + +#include +#include + +#include + +#include + +namespace cugraph { +namespace test { + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber) +{ + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + std::tie(d_edgelist_rows, d_edgelist_cols) = + cugraph::experimental::generate_rmat_edgelist( + handle, scale, edge_factor, a, b, c, seed, undirected ? true : false, scramble_vertex_ids); + if (undirected) { + // FIXME: need to symmetrize + CUGRAPH_FAIL("unimplemented."); + } + + rmm::device_uvector d_edgelist_weights(test_weighted ? d_edgelist_rows.size() : 0, + handle.get_stream()); + if (test_weighted) { + raft::random::Rng rng(seed + 1); + rng.uniform(d_edgelist_weights.data(), + d_edgelist_weights.size(), + weight_t{0.0}, + weight_t{1.0}, + handle.get_stream()); + } + + rmm::device_uvector d_vertices(static_cast(size_t{1} << scale), + handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + vertex_t{0}); + + return generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + false, + test_weighted, + renumber); +} + +// explicit instantiations + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +template std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 4682699df2d..37e87c62247 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -19,8 +19,8 @@ #include #include +#include -#include #include #include @@ -130,6 +130,58 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + +template +std::tuple, + rmm::device_uvector> +generate_graph_from_rmat_params(raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber); + +struct rmat_params_t { + size_t scale{}; + size_t edge_factor{}; + double a{}; + double b{}; + double c{}; + uint64_t seed{}; + bool undirected{}; + bool scramble_vertex_ids{}; +}; + +struct input_graph_specifier_t { + enum { MATRIX_MARKET_FILE_PATH, RMAT_PARAMS } tag{}; + std::string graph_file_full_path{}; + rmat_params_t rmat_params{}; +}; + template std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, vertex_t v) From 128abf0ca220d001d8d6ceb64d2a1ad8b0951fb2 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Tue, 9 Mar 2021 08:32:57 -0500 Subject: [PATCH 17/51] Update FAISS to 1.7.0 (#1444) upgraded FAISS to version 1.7 closes #1440 Authors: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) Approvers: - Alex Fender (@afender) - AJ Schmidt (@ajschmidt8) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1444 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/recipes/libcugraph/meta.yaml | 4 +-- cpp/CMakeLists.txt | 39 +++++++++++---------- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 255366b0a82..1e5a4609d1a 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index e64d7c77b7d..ddc66126257 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 1f05e4762ef..eed08f146ee 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -29,7 +29,7 @@ dependencies: - boost - cython>=0.29,<0.30 - pytest -- libfaiss=1.6.3 +- libfaiss=1.7.0 - faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 8f7495eab3c..bd0dde28af9 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -41,7 +41,7 @@ requirements: - ucx-proc=*=gpu - gtest - faiss-proc=*=cuda - - libfaiss=1.6.3 + - conda-forge::libfaiss=1.7.0 - gmock run: - libcudf={{ minor_version }} @@ -50,7 +50,7 @@ requirements: - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda - - libfaiss=1.6.3 + - conda-forge::libfaiss=1.7.0 #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b0365c3cfd6..26a8f98e265 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -297,7 +297,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG b055cf862a599fd45537d21a309edd8a6e06da4c + GIT_TAG 6455e05b3889db2b495cf3189b33c2b07bfbebf2 SOURCE_SUBDIR raft ) @@ -317,9 +317,9 @@ endif(DEFINED ENV{RAFT_PATH}) # https://cmake.org/cmake/help/v3.0/module/ExternalProject.html -# FIXME: gunrock is the only external package still using ExternalProject -# instead of FetchContent. Consider migrating to FetchContent soon (this may -# require updates to the gunrock cmake files to support this). +# FIXME: gunrock is still using ExternalProject instead of +# FetchContent. Consider migrating to FetchContent soon (this may require +# updates to the gunrock cmake files to support this). include(ExternalProject) @@ -360,31 +360,32 @@ if(BUILD_STATIC_FAISS) "Path to FAISS source directory") ExternalProject_Add(faiss GIT_REPOSITORY https://github.com/facebookresearch/faiss.git - GIT_TAG a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d + GIT_TAG 7c2d2388a492d65fdda934c7e74ae87acaeed066 CONFIGURE_COMMAND LIBS=-pthread CPPFLAGS=-w LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure - --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss - --with-blas=${BLAS_LIBRARIES} - --with-cuda=${CUDA_TOOLKIT_ROOT_DIR} - --with-cuda-arch=${FAISS_GPU_ARCHS} - -v + cmake -B build . + -DCMAKE_BUILD_TYPE=Release + -DBUILD_TESTING=OFF + -DFAISS_ENABLE_PYTHON=OFF + -DBUILD_SHARED_LIBS=OFF + -DFAISS_ENABLE_GPU=ON + -DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR} + -DCUDA_ARCHITECTURES=${FAISS_GPU_ARCHS} + -DBLAS_LIBRARIES=${BLAS_LIBRARIES} PREFIX ${FAISS_DIR} - BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 - BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a + BUILD_COMMAND make -C build -j${PARALLEL_LEVEL} VERBOSE=1 + BUILD_BYPRODUCTS ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a BUILD_ALWAYS 1 - INSTALL_COMMAND make -s install > /dev/null + INSTALL_COMMAND "" UPDATE_COMMAND "" - BUILD_IN_SOURCE 1 - PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true) + BUILD_IN_SOURCE 1) ExternalProject_Get_Property(faiss install_dir) add_library(FAISS::FAISS STATIC IMPORTED) - add_dependencies(FAISS::FAISS faiss) set_property(TARGET FAISS::FAISS PROPERTY - IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a) - set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src") + IMPORTED_LOCATION ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a) + set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src/faiss") else() set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) find_package(FAISS REQUIRED) From 6096b60cd453bae05680d84a36e5897fb71d80be Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:10:05 -0500 Subject: [PATCH 18/51] Updated NCCL to version 2.8.4 (#1445) update the environment to use NCCL 2.8.4 Authors: - Brad Rees (@BradReesWork) Approvers: - AJ Schmidt (@ajschmidt8) - @seunghwakang - Seunghwa Kang (@seunghwak) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1445 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/recipes/cugraph/meta.yaml | 4 ++-- conda/recipes/libcugraph/meta.yaml | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 1e5a4609d1a..f26c3dd45d9 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index ddc66126257..2848cc49dc7 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index eed08f146ee..82e8b409d13 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -14,7 +14,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.19* - dask-cudf=0.19* -- nccl>=2.7 +- nccl>=2.8.4 - ucx-py=0.19* - ucx-proc=*=gpu - scipy diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 90f5bed942a..e714b61d774 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: # conda build -c nvidia -c rapidsai -c conda-forge -c defaults . @@ -37,7 +37,7 @@ requirements: - dask-cuda {{ minor_version }} - dask>=2.12.0 - distributed>=2.12.0 - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index bd0dde28af9..bb5e4b468a5 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -36,7 +36,7 @@ requirements: - cudatoolkit {{ cuda_version }}.* - boost-cpp>=1.66 - libcypher-parser - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu - gtest @@ -46,7 +46,7 @@ requirements: run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - nccl>=2.7 + - nccl>=2.8.4 - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda From 4535396403fa20b323f1323d92a58d6d2445f5aa Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Wed, 10 Mar 2021 11:18:02 -0500 Subject: [PATCH 19/51] Update Changelog Link (#1446) The tag used for pre-releases was recently changed, so this PR updates the link in the changelog. Authors: - AJ Schmidt (@ajschmidt8) Approvers: - Jordan Jacobelli (@Ethyling) URL: https://github.com/rapidsai/cugraph/pull/1446 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe8e09f1e52..0011b99fbf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # cuGraph 0.19.0 (Date TBD) -Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.19-latest for the latest changes to this development branch. +Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch. # cuGraph 0.18.0 (24 Feb 2021) From 5e3551873339ce9544c7f55f7e77dec150311cdd Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Mon, 15 Mar 2021 11:21:24 -0500 Subject: [PATCH 20/51] Update and Test Renumber bindings (#1427) Authors: - @Iroy30 Approvers: - Seunghwa Kang (@seunghwak) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1427 --- cpp/include/utilities/cython.hpp | 9 +- cpp/src/utilities/cython.cu | 59 ++-- python/cugraph/dask/common/part_utils.py | 6 +- python/cugraph/dask/structure/renumber.py | 71 ---- python/cugraph/structure/new_number_map.py | 317 ++++++++++++++++++ .../{dask => }/structure/renumber_wrapper.pyx | 245 ++++++++------ 6 files changed, 508 insertions(+), 199 deletions(-) delete mode 100644 python/cugraph/dask/structure/renumber.py create mode 100644 python/cugraph/structure/new_number_map.py rename python/cugraph/{dask => }/structure/renumber_wrapper.pyx (73%) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index e94190897b8..98e850abbf0 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -190,10 +190,7 @@ struct major_minor_weights_t { // template struct renum_quad_t { - explicit renum_quad_t(raft::handle_t const& handle) - : dv_(0, handle.get_stream()), part_(std::vector(), false, 0, 0, 0, 0) - { - } + explicit renum_quad_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {} rmm::device_uvector& get_dv(void) { return dv_; } @@ -298,8 +295,8 @@ struct renum_quad_t { private: rmm::device_uvector dv_; cugraph::experimental::partition_t part_; - vertex_t nv_; - edge_t ne_; + vertex_t nv_{0}; + edge_t ne_{0}; }; // FIXME: finish description for vertex_partition_offsets // diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index e95a001cb91..5382b4856f3 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -762,28 +762,49 @@ std::unique_ptr> call_shuffle( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto zip_edge = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); - std::unique_ptr> ptr_ret = std::make_unique>(handle); - std::forward_as_tuple( - std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), - std::ignore) = - cugraph::experimental::groupby_gpuid_and_shuffle_values( - comm, // handle.get_comms(), - zip_edge, - zip_edge + num_edgelist_edges, - [key_func = - cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - handle.get_stream()); + if (edgelist_weights != nullptr) { + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); + + std::forward_as_tuple( + std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); + + std::forward_as_tuple(std::tie(ptr_ret->get_major(), ptr_ret->get_minor()), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } return ptr_ret; // RVO-ed } diff --git a/python/cugraph/dask/common/part_utils.py b/python/cugraph/dask/common/part_utils.py index 505272fa563..ac0ff6a9a43 100644 --- a/python/cugraph/dask/common/part_utils.py +++ b/python/cugraph/dask/common/part_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -83,7 +83,9 @@ async def _extract_partitions(dask_obj, client=None): client = default_client() if client is None else client # dask.dataframe or dask.array if isinstance(dask_obj, (daskDataFrame, daskArray, daskSeries)): - parts = persist_distributed_data(dask_obj, client) + # parts = persist_distributed_data(dask_obj, client) + persisted = client.persist(dask_obj) + parts = futures_of(persisted) # iterable of dask collections (need to colocate them) elif isinstance(dask_obj, collections.Sequence): # NOTE: We colocate (X, y) here by zipping delayed diff --git a/python/cugraph/dask/structure/renumber.py b/python/cugraph/dask/structure/renumber.py deleted file mode 100644 index 606a6bc4dc1..00000000000 --- a/python/cugraph/dask/structure/renumber.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.dask.structure import renumber_wrapper as renumber_w -import cugraph.comms.comms as Comms -import dask_cudf - - -def call_renumber(sID, - data, - num_verts, - num_edges, - is_mnmg): - wid = Comms.get_worker_id(sID) - handle = Comms.get_handle(sID) - return renumber_w.mg_renumber(data[0], - num_verts, - num_edges, - wid, - handle, - is_mnmg) - - -def renumber(input_graph): - - client = default_client() - - ddf = input_graph.edgelist.edgelist_df - - num_edges = len(ddf) - - if isinstance(ddf, dask_cudf.DataFrame): - is_mnmg = True - else: - is_mnmg = False - - num_verts = input_graph.number_of_vertices() - - if is_mnmg: - data = get_distributed_data(ddf) - result = [client.submit(call_renumber, - Comms.get_session_id(), - wf[1], - num_verts, - num_edges, - is_mnmg, - workers=[wf[0]]) - for idx, wf in enumerate(data.worker_to_parts.items())] - wait(result) - ddf = dask_cudf.from_delayed(result) - else: - call_renumber(Comms.get_session_id(), - ddf, - num_verts, - num_edges, - is_mnmg) - return ddf diff --git a/python/cugraph/structure/new_number_map.py b/python/cugraph/structure/new_number_map.py new file mode 100644 index 00000000000..f8a2164d2c4 --- /dev/null +++ b/python/cugraph/structure/new_number_map.py @@ -0,0 +1,317 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure import renumber_wrapper as c_renumber +import cugraph.comms as Comms +import dask_cudf +import numpy as np +import cudf +import cugraph.structure.number_map as legacy_number_map + + +def call_renumber(sID, + data, + num_edges, + is_mnmg, + store_transposed): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return c_renumber.renumber(data[0], + num_edges, + wid, + handle, + is_mnmg, + store_transposed) + + +class NumberMap: + + class SingleGPU: + def __init__(self, df, src_col_names, dst_col_names, id_type, + store_transposed): + self.col_names = NumberMap.compute_vals(src_col_names) + self.df = cudf.DataFrame() + self.id_type = id_type + self.store_transposed = store_transposed + self.numbered = False + + def to_internal_vertex_id(self, df, col_names): + tmp_df = df[col_names].rename( + columns=dict(zip(col_names, self.col_names)), copy=False + ) + index_name = NumberMap.generate_unused_column_name(df.columns) + tmp_df[index_name] = tmp_df.index + return ( + self.df.merge(tmp_df, on=self.col_names, how="right") + .sort_values(index_name) + .drop(columns=[index_name]) + .reset_index()["id"] + ) + + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.df.merge( + df, + right_on=internal_column_name, + left_on="id", + how="right", + ) + if internal_column_name != "id": + tmp_df = tmp_df.drop(columns=["id"]) + if external_column_names is None: + return tmp_df + else: + return tmp_df.rename( + columns=dict(zip(self.col_names, external_column_names)), + copy=False, + ) + + class MultiGPU: + def __init__( + self, ddf, src_col_names, dst_col_names, id_type, store_transposed + ): + self.col_names = NumberMap.compute_vals(src_col_names) + self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) + self.val_types["count"] = np.int32 + self.id_type = id_type + self.store_transposed = store_transposed + self.numbered = False + + def to_internal_vertex_id(self, ddf, col_names): + return self.ddf.merge( + ddf, + right_on=col_names, + left_on=self.col_names, + how="right", + )["global_id"] + + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.ddf.merge( + df, + right_on=internal_column_name, + left_on="global_id", + how="right" + ).map_partitions(lambda df: df.drop(columns="global_id")) + + if external_column_names is None: + return tmp_df + else: + return tmp_df.map_partitions( + lambda df: + df.rename( + columns=dict( + zip(self.col_names, external_column_names) + ), + copy=False + ) + ) + + def __init__(self, id_type=np.int32): + self.implementation = None + self.id_type = id_type + + def compute_vals_types(df, column_names): + """ + Helper function to compute internal column names and types + """ + return { + str(i): df[column_names[i]].dtype for i in range(len(column_names)) + } + + def generate_unused_column_name(column_names): + """ + Helper function to generate an unused column name + """ + name = 'x' + while name in column_names: + name = name + "x" + + return name + + def compute_vals(column_names): + """ + Helper function to compute internal column names based on external + column names + """ + return [str(i) for i in range(len(column_names))] + + def renumber(df, src_col_names, dst_col_names, preserve_order=False, + store_transposed=False): + + if isinstance(src_col_names, list): + renumber_type = 'legacy' + # elif isinstance(df[src_col_names].dtype, string): + # renumber_type = 'legacy' + else: + renumber_type = 'experimental' + + if renumber_type == 'legacy': + renumber_map, renumbered_df = legacy_number_map.renumber( + df, + src_col_names, + dst_col_names, + preserve_order, + store_transposed) + # Add shuffling once algorithms are switched to new renumber + # (ddf, + # num_verts, + # partition_row_size, + # partition_col_size, + # vertex_partition_offsets) = shuffle(input_graph, transposed=True) + return renumber_map, renumbered_df + + renumber_map = NumberMap() + if not isinstance(src_col_names, list): + src_col_names = [src_col_names] + dst_col_names = [dst_col_names] + if type(df) is cudf.DataFrame: + renumber_map.implementation = NumberMap.SingleGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed + ) + elif type(df) is dask_cudf.DataFrame: + renumber_map.implementation = NumberMap.MultiGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed + ) + else: + raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") + + num_edges = len(df) + + if isinstance(df, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + if is_mnmg: + client = default_client() + data = get_distributed_data(df) + result = [(client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_edges, + is_mnmg, + store_transposed, + workers=[wf[0]]), wf[0]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + + def get_renumber_map(data): + return data[0] + + def get_renumbered_df(data): + return data[1] + + renumbering_map = dask_cudf.from_delayed( + [client.submit(get_renumber_map, + data, + workers=[wf]) + for (data, wf) in result]) + renumbered_df = dask_cudf.from_delayed( + [client.submit(get_renumbered_df, + data, + workers=[wf]) + for (data, wf) in result]) + + renumber_map.implementation.ddf = renumbering_map + renumber_map.implementation.numbered = True + + return renumbered_df, renumber_map + else: + renumbering_map, renumbered_df = c_renumber.renumber( + df, + num_edges, + 0, + Comms.get_default_handle(), + is_mnmg, + store_transposed) + renumber_map.implementation.df = renumbering_map + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map + + def unrenumber(self, df, column_name, preserve_order=False): + """ + Given a DataFrame containing internal vertex ids in the identified + column, replace this with external vertex ids. If the renumbering + is from a single column, the output dataframe will use the same + name for the external vertex identifiers. If the renumbering is from + a multi-column input, the output columns will be labeled 0 through + n-1 with a suffix of _column_name. + Note that this function does not guarantee order or partitioning in + multi-GPU mode. + Parameters + ---------- + df: cudf.DataFrame or dask_cudf.DataFrame + A DataFrame containing internal vertex identifiers that will be + converted into external vertex identifiers. + column_name: string + Name of the column containing the internal vertex id. + preserve_order: (optional) bool + If True, preserve the ourder of the rows in the output + DataFrame to match the input DataFrame + Returns + --------- + df : cudf.DataFrame or dask_cudf.DataFrame + The original DataFrame columns exist unmodified. The external + vertex identifiers are added to the DataFrame, the internal + vertex identifier column is removed from the dataframe. + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> + >>> df, number_map = NumberMap.renumber(df, '0', '1') + >>> + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(df, 'src', 'dst') + >>> + >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, + >>> tol = 1.0e-05) + >>> + >>> pr = number_map.unrenumber(pr, 'vertex') + >>> + """ + if len(self.col_names) == 1: + # Output will be renamed to match input + mapping = {"0": column_name} + else: + # Output will be renamed to ${i}_${column_name} + mapping = {} + for nm in self.col_names: + mapping[nm] = nm + "_" + column_name + + if preserve_order: + index_name = NumberMap.generate_unused_column_name(df) + df[index_name] = df.index + + df = self.from_internal_vertex_id(df, column_name, drop=True) + + if preserve_order: + df = df.sort_values( + index_name + ).drop(columns=index_name).reset_index(drop=True) + + if type(df) is dask_cudf.DataFrame: + return df.map_partitions( + lambda df: df.rename(columns=mapping, copy=False) + ) + else: + return df.rename(columns=mapping, copy=False) diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx similarity index 73% rename from python/cugraph/dask/structure/renumber_wrapper.pyx rename to python/cugraph/structure/renumber_wrapper.pyx index 1ab290cfb10..302fcfe583b 100644 --- a/python/cugraph/dask/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -25,41 +25,45 @@ import numpy as np from libcpp.utility cimport move from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer -cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w): +cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): # extract shuffled result: # cdef pair[unique_ptr[device_buffer], size_t] pair_s_major = deref(ptr_maj_min_w).get_major_wrap() cdef pair[unique_ptr[device_buffer], size_t] pair_s_minor = deref(ptr_maj_min_w).get_minor_wrap() cdef pair[unique_ptr[device_buffer], size_t] pair_s_weights = deref(ptr_maj_min_w).get_weights_wrap() - shufled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) - shufled_major_buffer = Buffer(shufled_major_buffer) + shuffled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) + shuffled_major_buffer = Buffer(shuffled_major_buffer) - shufled_major_series = cudf.Series(data=shufled_major_buffer, dtype=vertex_t) + shuffled_major_series = cudf.Series(data=shuffled_major_buffer, dtype=vertex_t) - shufled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) - shufled_minor_buffer = Buffer(shufled_minor_buffer) - - shufled_minor_series = cudf.Series(data=shufled_minor_buffer, dtype=vertex_t) - - shufled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) - shufled_weights_buffer = Buffer(shufled_weights_buffer) - - shufled_weights_series = cudf.Series(data=shufled_weights_buffer, dtype=weight_t) + shuffled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) + shuffled_minor_buffer = Buffer(shuffled_minor_buffer) + shuffled_minor_series = cudf.Series(data=shuffled_minor_buffer, dtype=vertex_t) + shuffled_df = cudf.DataFrame() shuffled_df['src']=shuffled_major_series shuffled_df['dst']=shuffled_minor_series - shuffled_df['weights']= shuffled_weights_series + + if weights is not None: + weight_t = weights.dtype + shuffled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) + shuffled_weights_buffer = Buffer(shuffled_weights_buffer) + + shuffled_weights_series = cudf.Series(data=shuffled_weights_buffer, dtype=weight_t) + + shuffled_df['weights']= shuffled_weights_series return shuffled_df -def mg_renumber(input_df, # maybe use cpdef ? - num_global_verts, - num_global_edges, - rank, - handle, - is_multi_gpu): + +def renumber(input_df, # maybe use cpdef ? + num_global_edges, + rank, + handle, + is_multi_gpu, + transposed): """ Call MNMG renumber """ @@ -67,11 +71,16 @@ def mg_renumber(input_df, # maybe use cpdef ? # TODO: get handle_t out of handle... handle_ptr = handle_size_t - src = input_df['src'] - dst = input_df['dst'] + if not transposed: + major_vertices = input_df['src'] + minor_vertices = input_df['dst'] + else: + major_vertices = input_df['dst'] + minor_vertices = input_df['src'] + cdef uintptr_t c_edge_weights = NULL # set below... - vertex_t = src.dtype + vertex_t = major_vertices.dtype if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: @@ -81,6 +90,7 @@ def mg_renumber(input_df, # maybe use cpdef ? weight_t = weights.dtype c_edge_weights = weights.__cuda_array_interface__['data'][0] else: + weights = None weight_t = np.dtype("float32") if (vertex_t != np.dtype("int32") and vertex_t != np.dtype("int64")): @@ -93,10 +103,10 @@ def mg_renumber(input_df, # maybe use cpdef ? raise Exception("Incompatible vertex_t and edge_t types.") # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_partition_edges = len(major_vertices) - cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_major_vertices = major_vertices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_minor_vertices = minor_vertices.__cuda_array_interface__['data'][0] cdef bool is_hyper_partitioned = False # for now @@ -132,27 +142,29 @@ def mg_renumber(input_df, # maybe use cpdef ? if (vertex_t == np.dtype("int32")): if ( edge_t == np.dtype("int32")): if( weight_t == np.dtype("float32")): - ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] - + if(is_multi_gpu): + ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] - ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, num_partition_edges, is_hyper_partitioned, - do_check, + 1, mg_flag).release()) pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper @@ -174,8 +186,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # # and add the previous series to it: @@ -185,18 +198,23 @@ def mg_renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): - ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + if(is_multi_gpu): + ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -228,7 +246,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # @@ -241,17 +261,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): - ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) - - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + if(is_multi_gpu): + ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + else: + shuffled_df = input_df + + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -283,8 +307,10 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) + # create new cudf df # # and add the previous series to it: @@ -295,17 +321,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): - ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + if(is_multi_gpu): + ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -337,8 +367,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) - + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # # and add the previous series to it: @@ -351,17 +382,21 @@ def mg_renumber(input_df, # maybe use cpdef ? elif (vertex_t == np.dtype("int64")): if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): - ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_64_32.get()) + if(is_multi_gpu): + ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -393,7 +428,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # @@ -405,17 +442,21 @@ def mg_renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): - ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), - c_src_vertices, - c_dst_vertices, - c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - - shuffled_df = renumber_helper(ptr_shuffled_64_64.get()) + if(is_multi_gpu): + ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + c_major_vertices, + c_minor_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + else: + shuffled_df = input_df - shuffled_src = shufled_df['src'] - shuffled_dst = shufled_df['dst'] + shuffled_src = shuffled_df['src'] + shuffled_dst = shuffled_df['dst'] + num_partition_edges = len(shuffled_df) shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] @@ -447,7 +488,9 @@ def mg_renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), + uniq_partition_vector_32.get()[0].at(1)), + dtype=vertex_t) # create new cudf df # From 591f3fb31ded25634efdfc14821eeb5b367a757f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Mon, 15 Mar 2021 12:21:41 -0400 Subject: [PATCH 21/51] Fix bugs in copy_v_transform_reduce_key_aggregated_out_nbr & groupby_gpuid_and_shuffle (#1434) Related to addressing Issue https://github.com/rapidsai/cugraph/issues/1381. Fix bugs in MNMG graph primitives to support Louvain. Authors: - Seunghwa Kang (@seunghwak) Approvers: - Chuck Hastings (@ChuckHastings) - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1434 --- ...ransform_reduce_key_aggregated_out_nbr.cuh | 102 ++++-------- cpp/include/utilities/collect_comm.cuh | 153 ++++++++++++++++++ cpp/include/utilities/shuffle_comm.cuh | 28 ++++ cpp/include/utilities/thrust_tuple_utils.cuh | 3 +- 4 files changed, 215 insertions(+), 71 deletions(-) create mode 100644 cpp/include/utilities/collect_comm.cuh diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 8490df1d17d..11cf2cb1137 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -88,16 +88,17 @@ __global__ void for_all_major_for_all_nbr_low_degree( // in-place reduce_by_key vertex_t key_idx{0}; key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? weights[0] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset] : weight_t{1.0}; + for (edge_t i = 1; i < local_degree; ++i) { if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { key_aggregated_edge_weights[local_offset + key_idx] += - weights != nullptr ? weights[i] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; } else { ++key_idx; minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? weights[i] : weight_t{1.0}; + weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; } } thrust::fill(thrust::seq, @@ -170,6 +171,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( template insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - rmm::device_uvector unique_keys( - graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); - thrust::copy( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - adj_matrix_col_key_first, - adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(), - unique_keys.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_keys.begin(), - unique_keys.end()); - auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_keys.begin(), - unique_keys.end()); - unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream()); - - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); - std::vector rx_value_counts{}; - std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( - comm, - unique_keys.begin(), - unique_keys.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { - return key_func(val); - }, - handle.get_stream()); - - rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin()); - - rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream()); - - std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - kv_map_ptr.reset(); - - kv_map_ptr = std::make_unique>( - static_cast(static_cast(unique_keys.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - - kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); - } - // 2. aggregate each vertex out-going edges based on keys and transform-reduce. auto loop_count = size_t{1}; @@ -382,9 +322,31 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto val) { return key_func(thrust::get<1>(val)); }, handle.get_stream()); - tmp_major_vertices = std::move(rx_major_vertices); - tmp_minor_keys = std::move(rx_minor_keys); - tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(rx_major_vertices.begin(), rx_minor_keys.begin())); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + rx_major_vertices.size(), + rx_key_aggregated_edge_weights.begin()); + tmp_major_vertices.resize(rx_major_vertices.size(), handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pair_first, + pair_first + rx_major_vertices.size(), + rx_key_aggregated_edge_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin())), + tmp_key_aggregated_edge_weights.begin()); + tmp_major_vertices.resize( + thrust::distance(tmp_key_aggregated_edge_weights.begin(), thrust::get<1>(pair_it)), + handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_major_vertices.shrink_to_fit(handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); } auto tmp_e_op_result_buffer = diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh new file mode 100644 index 00000000000..5ca58ebeb17 --- /dev/null +++ b/cpp/include/utilities/collect_comm.cuh @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +// for key = [map_key_first, map_key_last), key_to_gpu_id_op(key) should be coincide with +// comm.get_rank() +template +decltype(allocate_dataframe_buffer::value_type>( + 0, cudaStream_t{nullptr})) +collect_values_for_keys(raft::comms::comms_t const &comm, + VertexIterator0 map_key_first, + VertexIterator0 map_key_last, + ValueIterator map_value_first, + VertexIterator1 collect_key_first, + VertexIterator1 collect_key_last, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + using vertex_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, vertex_t>::value); + using value_t = typename std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary + // search based approach + + // 1. build a cuco::static_map object for the map k, v pairs. + + auto kv_map_ptr = std::make_unique>( + static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / + load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } + + // 2. collect values for the unique keys in [collect_key_first, collect_key_last) + + rmm::device_uvector unique_keys(thrust::distance(collect_key_first, collect_key_last), + stream); + thrust::copy( + rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin()); + // FIXME: sort and unique are unnecessary if the keys in [collect_key_first, collect_key_last) are + // already unique, if this cost becomes a performance bottlenec, we may add + // collect_values_for_unique_keys in the future + thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end()); + unique_keys.resize( + thrust::distance( + unique_keys.begin(), + thrust::unique(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end())), + stream); + + rmm::device_uvector values_for_unique_keys(0, stream); + { + rmm::device_uvector rx_unique_keys(0, stream); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( + comm, + unique_keys.begin(), + unique_keys.end(), + [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, + stream); + + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + + rmm::device_uvector rx_values_for_unique_keys(0, stream); + std::tie(rx_values_for_unique_keys, std::ignore) = + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + + values_for_unique_keys = std::move(rx_values_for_unique_keys); + } + + // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, + // values_for_unique_keys. + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + static_cast(static_cast(unique_keys.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + } + + // 4. find values for [collect_key_first, collect_key_last) + + auto value_buffer = allocate_dataframe_buffer( + thrust::distance(collect_key_first, collect_key_last), stream); + kv_map_ptr->find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + + return value_buffer; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index da86f76b11d..8c363c9a346 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -228,6 +228,15 @@ auto shuffle_values(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + assert(rx_src_ranks[i] < comm_size); + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_value_buffer), rx_counts); } @@ -271,6 +280,14 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_value_buffer), rx_counts); } @@ -282,6 +299,8 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, KeyToGPUIdOp key_to_gpu_id_op, cudaStream_t stream) { + auto const comm_size = comm.get_size(); + auto d_tx_value_counts = detail::sort_and_count( comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); @@ -328,6 +347,15 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, rx_src_ranks, stream); + if (rx_counts.size() < static_cast(comm_size)) { + std::vector tmp_rx_counts(comm_size, size_t{0}); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + assert(rx_src_ranks[i] < comm_size); + tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i]; + } + rx_counts = std::move(tmp_rx_counts); + } + return std::make_tuple(std::move(rx_keys), std::move(rx_value_buffer), rx_counts); } diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh index 0ad71ba5e05..01843a583eb 100644 --- a/cpp/include/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include From fe0cfc7815017841ce281595e0d94f608029dc03 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 16 Mar 2021 16:53:23 -0500 Subject: [PATCH 22/51] Update to make notebook_list.py compatible with numba 0.53 (#1455) A recent update to numba 0.53 in CI broke this script and caused CI failures. This makes the script compatible with both pre and post numba 0.53 versions. Tested in a local env with numba 0.53 installed. Authors: - Rick Ratzel (@rlratzel) Approvers: - Alex Fender (@afender) - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1455 --- ci/gpu/notebook_list.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py index bb54913ac8d..8748c434006 100644 --- a/ci/gpu/notebook_list.py +++ b/ci/gpu/notebook_list.py @@ -24,7 +24,9 @@ pascal = False device = cuda.get_current_device() -cc = getattr(device, 'COMPUTE_CAPABILITY') +# check for the attribute using both pre and post numba 0.53 names +cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ + getattr(device, 'compute_capability') if (cc[0] < 7): pascal = True From a7c4ebd906dbfa09e6509ab9aac502cce3c7695a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 17 Mar 2021 23:44:08 +1100 Subject: [PATCH 23/51] Remove literals passed to `device_uvector::set_element_async` (#1453) After rapidsai/rmm#725 is merged, this PR updates cuspatial to eliminate passing literal values to device_uvector::set_element_async. Companion PR to rapidsai/cuspatial#367 Authors: - Mark Harris (@harrism) Approvers: - Seunghwa Kang (@seunghwak) - Alex Fender (@afender) - Andrei Schaffer (@aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1453 --- cpp/src/experimental/graph.cu | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 498bb4eaefe..5abe141dafd 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -304,9 +304,15 @@ graph_t segment_offsets(detail::num_segments_per_vertex_partition + 1, default_stream); - segment_offsets.set_element_async(0, 0, default_stream); + + // temporaries are necessary because the &&-overload of device_uvector is deleted + // Note that we must sync `default_stream` before these temporaries go out of scope to + // avoid use after free. (The syncs are at the end of this function) + auto zero_vertex = vertex_t{0}; + auto vertex_count = static_cast(degrees.size()); + segment_offsets.set_element_async(0, zero_vertex, default_stream); segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, degrees.size(), default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream); thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), degrees.begin(), @@ -454,9 +460,16 @@ graph_t segment_offsets(detail::num_segments_per_vertex_partition + 1, default_stream); - segment_offsets.set_element_async(0, 0, default_stream); + + // temporaries are necessary because the &&-overload of device_uvector is deleted + // Note that we must sync `default_stream` before these temporaries go out of scope to + // avoid use after free. (The syncs are at the end of this function) + auto zero_vertex = vertex_t{0}; + auto vertex_count = static_cast(this->get_number_of_vertices()); + segment_offsets.set_element_async(0, zero_vertex, default_stream); + segment_offsets.set_element_async( - detail::num_segments_per_vertex_partition, this->get_number_of_vertices(), default_stream); + detail::num_segments_per_vertex_partition, vertex_count, default_stream); thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream), degree_first, From c5ce11942835fbbf4c55a263b2f73cf5f3274c39 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 17 Mar 2021 12:17:46 -0400 Subject: [PATCH 24/51] Updating docs (#1448) * added Hungarian to api.rst * fixed missing examples * fixed syntax Authors: - Brad Rees (@BradReesWork) Approvers: - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1448 --- README.md | 1 - docs/source/api.rst | 62 ++++++++++++++++ docs/source/dask-cugraph.rst | 71 +++++++------------ python/cugraph/bsp/traversal/bfs_bsp.py | 10 ++- python/cugraph/community/egonet.py | 11 +++ python/cugraph/components/connectivity.py | 4 +- .../dask/centrality/katz_centrality.py | 6 +- python/cugraph/dask/community/louvain.py | 5 +- python/cugraph/dask/link_analysis/pagerank.py | 6 +- python/cugraph/dask/traversal/bfs.py | 6 +- python/cugraph/dask/traversal/sssp.py | 6 +- python/cugraph/tree/minimum_spanning_tree.py | 24 +++++-- 12 files changed, 142 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 62059e9c7b6..77377fe2bbc 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,6 @@ As of Release 0.18 - including 0.18 nightly | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | | Other | | | | -| | Hungarian Algorithm | Single-GPU | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | | | | diff --git a/docs/source/api.rst b/docs/source/api.rst index dcdf3e6ff33..b02f8f488c5 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -48,6 +48,13 @@ Katz Centrality :undoc-members: +Katz Centrality (MG) +-------------------- + +.. automodule:: cugraph.dask.centrality.katz_centrality + :members: + :undoc-members: + Community ========= @@ -86,6 +93,14 @@ Louvain :members: :undoc-members: +Louvain (MG) +------------ + +.. automodule:: cugraph.dask.community.louvain + :members: + :undoc-members: + + Spectral Clustering ------------------- @@ -148,6 +163,17 @@ Force Atlas 2 :undoc-members: +Linear Assignment +================= + +Hungarian +------------- + +.. automodule:: cugraph.linear_assignment.hungarian + :members: + :undoc-members: + + Link Analysis ============= @@ -165,6 +191,13 @@ Pagerank :members: :undoc-members: +Pagerank (MG) +--------- + +.. automodule:: cugraph.dask.link_analysis.pagerank + :members: pagerank + :undoc-members: + Link Prediction =============== @@ -202,6 +235,13 @@ Breadth-first-search :members: :undoc-members: +Breadth-first-search (MG) +-------------------- + +.. automodule:: cugraph.dask.traversal.bfs + :members: + :undoc-members: + Single-source-shortest-path --------------------------- @@ -209,6 +249,13 @@ Single-source-shortest-path :members: :undoc-members: +Single-source-shortest-path (MG) +--------------------------- + +.. automodule:: cugraph.dask.traversal.sssp + :members: + :undoc-members: + Tree ========= @@ -227,3 +274,18 @@ Maximum Spanning Tree :members: :undoc-members: + +DASK MG Helper functions +=========================== + +.. automodule:: cugraph.comms.comms + :members: initialize + :undoc-members: + +.. automodule:: cugraph.comms.comms + :members: destroy + :undoc-members: + +.. automodule:: cugraph.dask.common.read_utils + :members: get_chunksize + :undoc-members: diff --git a/docs/source/dask-cugraph.rst b/docs/source/dask-cugraph.rst index b27ad382809..51487bfbf05 100644 --- a/docs/source/dask-cugraph.rst +++ b/docs/source/dask-cugraph.rst @@ -13,58 +13,41 @@ With cuGraph and Dask, whether you’re using a single NVIDIA GPU or multiple no If your graph comfortably fits in memory on a single GPU, you would want to use the single-GPU version of cuGraph. If you want to distribute your workflow across multiple GPUs and have more data than you can fit in memory on a single GPU, you would want to use cuGraph's multi-GPU features. +Example +======== -Distributed Graph Algorithms ----------------------------- +.. code-block:: python -.. automodule:: cugraph.dask.link_analysis.pagerank - :members: pagerank - :undoc-members: + from dask.distributed import Client, wait + from dask_cuda import LocalCUDACluster + import cugraph.comms as Comms + import cugraph.dask as dask_cugraph -.. automodule:: cugraph.dask.traversal.bfs - :members: bfs - :undoc-members: + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize(p2p=True) + # Helper function to set the reader chunk size to automatically get one partition per GPU + chunksize = dask_cugraph.get_chunksize(input_data_path) -Helper functions ----------------- + # Multi-GPU CSV reader + e_list = dask_cudf.read_csv(input_data_path, + chunksize = chunksize, + delimiter=' ', + names=['src', 'dst'], + dtype=['int32', 'int32']) -.. automodule:: cugraph.comms.comms - :members: initialize - :undoc-members: + G = cugraph.DiGraph() + G.from_dask_cudf_edgelist(e_list, source='src', destination='dst') -.. automodule:: cugraph.comms.comms - :members: destroy - :undoc-members: + # now run PageRank + pr_df = dask_cugraph.pagerank(G, tol=1e-4) -.. automodule:: cugraph.dask.common.read_utils - :members: get_chunksize - :undoc-members: + # All done, clean up + Comms.destroy() + client.close() + cluster.close() -Consolidation -============= -cuGraph can transparently interpret the Dask cuDF Dataframe as a regular Dataframe when loading the edge list. This is particularly helpful for workflows extracting a single GPU sized edge list from a distributed dataset. From there any existing single GPU feature will just work on this input. +| -For instance, consolidation allows leveraging Dask cuDF CSV reader to load file(s) on multiple GPUs and consolidate this input to a single GPU graph. Reading is often the time and memory bottleneck, with this feature users can call the Multi-GPU version of the reader without changing anything else. - -Batch Processing -================ - -cuGraph can leverage multi GPUs to increase processing speed for graphs that fit on a single GPU, providing faster analytics on such graphs. -You will be able to use the Graph the same way as you used to in a Single GPU environment, but analytics that support batch processing will automatically use the GPUs available to the dask client. -For example, Betweenness Centrality scores can be slow to obtain depending on the number of vertices used in the approximation. Thank to Multi GPUs Batch Processing, -you can create Single GPU graph as you would regularly do it using cuDF CSV reader, enable Batch analytics on it, and obtain scores much faster as each GPU will handle a sub-set of the sources. -In order to use Batch Analytics you need to set up a Dask Cluster and Client in addition to the cuGraph communicator, then you can simply call `enable_batch()` on you graph, and algorithms supporting batch processing will use multiple GPUs. - -Algorithms supporting Batch Processing --------------------------------------- -.. automodule:: cugraph.centrality - :members: betweenness_centrality - :undoc-members: - :noindex: - -.. automodule:: cugraph.centrality - :members: edge_betweenness_centrality - :undoc-members: - :noindex: diff --git a/python/cugraph/bsp/traversal/bfs_bsp.py b/python/cugraph/bsp/traversal/bfs_bsp.py index 28a71631443..9a2fd48e201 100644 --- a/python/cugraph/bsp/traversal/bfs_bsp.py +++ b/python/cugraph/bsp/traversal/bfs_bsp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import warnings import cudf from collections import OrderedDict @@ -59,6 +59,12 @@ def bfs_df_pregel(_df, start, src_col='src', dst_col='dst', copy_data=True): """ + warnings.warn( + "This feature is deprecated and will be" + "dropped from cuGraph in release 0.20.", + FutureWarning, + ) + # extract the src and dst into a dataframe that can be modified if copy_data: coo_data = _df[[src_col, dst_col]] diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index 9ff12158b13..ca3c6149ece 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -74,6 +74,17 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): G_ego : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> ego_graph = cugraph.ego_graph(G, seed, radius=2) + """ (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index 7c68afd7ced..72f33ebfcbb 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -378,7 +378,7 @@ def connected_components(G, header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) - >>> df = cugraph.strongly_connected_components(G) + >>> df = cugraph.connected_components(G, connection="weak") """ if connection == "weak": return weakly_connected_components(G, directed, diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index cf6ad95f974..e690e291928 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -115,7 +115,8 @@ def katz_centrality(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -125,7 +126,6 @@ def katz_centrality(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.katz_centrality(dg) - >>> Comms.destroy() """ nstart = None diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 11ecb78375f..495061c0f81 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,7 +55,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0): Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize, delimiter=' ', diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index 1e9d79e0aa6..d8a76f1231e 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -105,7 +105,8 @@ def pagerank(input_graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -115,7 +116,6 @@ def pagerank(input_graph, >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', edge_attr='value') >>> pr = dcg.pagerank(dg) - >>> Comms.destroy() """ from cugraph.structure.graph import null_check diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 7a2c50a3bc0..51e0dc0de5d 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def bfs(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def bfs(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.bfs(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index ce0c7908664..52f2b9b256c 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,8 @@ def sssp(graph, Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize(p2p=True) + >>> ... Init a DASK Cluster + >> see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', @@ -85,7 +86,6 @@ def sssp(graph, >>> dg = cugraph.DiGraph() >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.sssp(dg, 0) - >>> Comms.destroy() """ client = default_client() diff --git a/python/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/tree/minimum_spanning_tree.py index 25a365665df..45e996aa083 100644 --- a/python/cugraph/tree/minimum_spanning_tree.py +++ b/python/cugraph/tree/minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,7 +17,7 @@ from cugraph.utilities import cugraph_to_nx -def minimum_spanning_tree_subgraph(G): +def _minimum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -32,7 +32,7 @@ def minimum_spanning_tree_subgraph(G): return mst_subgraph -def maximum_spanning_tree_subgraph(G): +def _maximum_spanning_tree_subgraph(G): mst_subgraph = Graph() if type(G) is not Graph: raise Exception("input graph must be undirected") @@ -68,28 +68,33 @@ def minimum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a minimum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a minimum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = minimum_spanning_tree_subgraph(G) + mst = _minimum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return minimum_spanning_tree_subgraph(G) + return _minimum_spanning_tree_subgraph(G) def maximum_spanning_tree( @@ -103,25 +108,30 @@ def maximum_spanning_tree( ---------- G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. + weight : string default to the weights in the graph, if the graph edges do not have a weight attribute a default weight of 1 will be used. + algorithm : string Default to 'boruvka'. The parallel algorithm to use when finding a maximum spanning tree. + ignore_nan : bool Default to False + Returns ------- G_mst : cuGraph.Graph or networkx.Graph A graph descriptor with a maximum spanning tree or forest. The networkx graph will not have all attributes copied over + """ G, isNx = check_nx_graph(G) if isNx is True: - mst = maximum_spanning_tree_subgraph(G) + mst = _maximum_spanning_tree_subgraph(G) return cugraph_to_nx(mst) else: - return maximum_spanning_tree_subgraph(G) + return _maximum_spanning_tree_subgraph(G) From 1c8da699a11682d0fb639614bac7fc10aecd66ba Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 18 Mar 2021 08:22:24 -0500 Subject: [PATCH 25/51] Removed unused dependencies from libcugraph recipe, moved non-test script code from test script to gpu build script (#1468) * Removed unused dependencies from the `libcugraph` recipe. This is motivated by the CuPy project to integrate `libcugraph` as the graph analytics backend with minimal extra dependencies ( https://github.com/cupy/cupy/issues/4219, https://github.com/cupy/cupy/issues/2431, https://github.com/cupy/cupy/pull/4054 ) * Moved non-test script code from test script to gpu build script. The `FIXME` addressed for this was added after discussing with @raydouglass earlier, and will allow any Project Flash failures to fail the build immediately instead of attempting to then run tests. * Removed unused cudf lib reference from test cmake file. Tested by doing a successful local `conda build` of the recipe. Authors: - Rick Ratzel (@rlratzel) Approvers: - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1468 --- ci/gpu/build.sh | 21 +++++++++++++++++++-- ci/test.sh | 24 ------------------------ conda/recipes/libcugraph/meta.yaml | 13 ++----------- cpp/tests/CMakeLists.txt | 1 - 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0fef7b62f8d..7242b4a11f5 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -90,8 +90,25 @@ conda list --show-channel-urls ################################################################################ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - gpuci_logger "Build from source" - $WORKSPACE/build.sh -v clean libcugraph cugraph + gpuci_logger "Build from source" + $WORKSPACE/build.sh -v clean libcugraph cugraph +else + export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" + + # Faiss patch + echo "Update libcugraph.so" + cd $LIBCUGRAPH_BUILD_DIR + chrpath -d libcugraph.so + patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so + + CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` + CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension + CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install + echo "Installing $CONDA_FILE" + conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" + + echo "Build cugraph..." + $WORKSPACE/build.sh cugraph fi ################################################################################ diff --git a/ci/test.sh b/ci/test.sh index b0134e97246..58cbb950f73 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -61,30 +61,6 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -# FIXME: if possible, any install and build steps should be moved outside this -# script since a failing install/build step is treated as a failing test command -# and will not stop the script. This script is also only expected to run tests -# in a preconfigured environment, and install/build steps are unexpected side -# effects. -if [[ "$PROJECT_FLASH" == "1" ]]; then - export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" - - # Faiss patch - echo "Update libcugraph.so" - cd $LIBCUGRAPH_BUILD_DIR - chrpath -d libcugraph.so - patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so - - CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` - CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension - CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install - echo "Installing $CONDA_FILE" - conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" - - echo "Build cugraph..." - $WORKSPACE/build.sh cugraph -fi - # Do not abort the script on error from this point on. This allows all tests to # run regardless of pass/fail, but relies on the ERR trap above to manage the # EXITCODE for the script. diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index bb5e4b468a5..2602b2d8608 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -32,31 +32,22 @@ build: requirements: build: - cmake>=3.12.4 - - libcudf={{ minor_version }} - cudatoolkit {{ cuda_version }}.* + - librmm {{ minor_version }}.* - boost-cpp>=1.66 - - libcypher-parser - nccl>=2.8.4 - - ucx-py {{ minor_version }} - ucx-proc=*=gpu - gtest + - gmock - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 - - gmock run: - - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.8.4 - - ucx-py {{ minor_version }} - ucx-proc=*=gpu - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 -#test: -# commands: -# - test -f $PREFIX/include/cugraph.h - - about: home: http://rapids.ai/ license: Apache-2.0 diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1db2f9df42e..5571cf5f124 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -91,7 +91,6 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) cugraph GTest::GTest GTest::Main - ${CUDF_LIBRARY} ${NCCL_LIBRARIES} cudart cuda From ab4b77b4d6b32ecfbe965821f1eff737f1b06f07 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 18 Mar 2021 15:47:11 -0500 Subject: [PATCH 26/51] Add additional datasets to improve coverage (#1441) add datasets to test self-loops, string vertex IDs and isolated vertices Update README closes #1214 Authors: - Joseph Nke (@jnke2016) Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1441 --- datasets/README.md | 199 ++++++++++++------- datasets/dolphins_multi_edge.csv | 325 +++++++++++++++++++++++++++++++ datasets/dolphins_s_loop.csv | 321 ++++++++++++++++++++++++++++++ datasets/get_test_data.sh | 10 +- datasets/karate_mod.mtx | 81 ++++++++ datasets/karate_multi_edge.csv | 160 +++++++++++++++ datasets/karate_s_loop.csv | 160 +++++++++++++++ datasets/karate_str.mtx | 78 ++++++++ python/cugraph/tests/utils.py | 15 ++ 9 files changed, 1281 insertions(+), 68 deletions(-) create mode 100644 datasets/dolphins_multi_edge.csv create mode 100644 datasets/dolphins_s_loop.csv create mode 100644 datasets/karate_mod.mtx create mode 100644 datasets/karate_multi_edge.csv create mode 100644 datasets/karate_s_loop.csv create mode 100644 datasets/karate_str.mtx diff --git a/datasets/README.md b/datasets/README.md index c7f76a91dfe..e42413fc996 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -1,67 +1,132 @@ -# Cugraph test and benchmark data - -## Python - -This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details: - -| Graph | V | E | Directed | Weighted | -| ------------- | ----- | ----- | -------- | -------- | -| karate | 34 | 156 | No | No | -| dolphin | 62 | 318 | No | No | -| netscience | 1,589 | 5,484 | No | Yes | - -**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. - -**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). - -**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006. - -## C++ -Cugraph's C++ analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory. -``` -cd /datasets -./get_test_data.sh -``` -You may run this script from elsewhere and store C++ test input to another location. - -Before running the tests, you should let cuGraph know where to find the test input by using: -``` -export RAPIDS_DATASET_ROOT_DIR= -``` - -## Benchmarks -Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option: -``` -cd /datasets -./get_test_data.sh --benchmark -``` -The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs: -``` -/datasets/csv - |- directed - |--- cit-Patents.csv (250M) - |--- soc-LiveJournal1.csv (965M) - |- undirected - |--- europe_osm.csv (1.8G) - |--- hollywood.csv (1.5G) - |--- soc-twitter-2010.csv (8.8G) -``` -The benchmark datasets are described below: -| Graph | V | E | Directed | Weighted | -| ----------------- | ---------- | ------------- | -------- | -------- | -| cit-Patents | 3,774,768 | 16,518,948 | Yes | No | -| soc-LiveJournal1 | 4,847,571 | 43,369,619 | Yes | No | -| europe_osm | 50,912,018 | 54,054,660 | No | No | -| hollywood | 1,139,905 | 57,515,616 | No | No | -| soc-twitter-2010 | 21,297,772 | 265,025,809 | No | No | - -**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations. -**soc-LiveJournal** : A graph of the LiveJournal social network. -**europe_osm** : A graph of OpenStreetMap data for Europe. -**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together. -**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i. - -_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._ - -## Reference -The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/ +# Cugraph test and benchmark data + +## Python + +This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details: + +| Graph | V | E | Directed | Weighted | +| ------------- | ----- | ----- | -------- | -------- | +| karate | 34 | 156 | No | No | +| dolphin | 62 | 318 | No | No | +| netscience | 1,589 | 5,484 | No | Yes | + +**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. + +**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). + +**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006. + + + +### Modified datasets + +The datasets below were added to provide input that contains self-loops, string vertex IDs, isolated vertices, and multiple edges. + +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| ------------------- | ------- | ---------- | -------- | --------- | ---------- | ---------- | ------------ | ----------- | +| karate_multi_edge | 34 | 160 | No | Yes | No | No | No | Yes | +| dolphins_multi_edge | 62 | 325 | No | Yes | No | No | No | Yes | +| karate_s_loop | 34 | 160 | No | Yes | Yes | No | No | No | +| dolphins_s_loop | 62 | 321 | No | Yes | Yes | No | No | No | +| karate_mod | 37 | 156 | No | No | No | Yes | No | No | +| karate_str | 34 | 156 | No | Yes | No | No | Yes | No | + +**karate_multi_edge** : The graph "karate_multi_edge" is a modified version of the "karate" graph where multi-edges were added + +**dolphins_multi_edge** : The graph "dolphins_multi_edge" is a modified version of the "dolphin" graph where multi-edges were added + +**karate_s_loop** : The graph "karate_s_loop" is a modified version of the "karate" graph where self-loops were added + +**dolphins_s_loop** : The graph "dolphins_s_loop" is a modified version of the "dolphin" graph where self-loops were added + +**karate_mod** : The graph "karate_mod" is a modified version of the "karate" graph where vertices and edges were added + +**karate_str** : The graph "karate_str" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. The integer vertices were replaced by strings + + +### Additional datasets + +Larger datasets containing self-loops can be downloaded by running the provided script from the `datasets` directory using the `--self_loops` +option: +``` +cd /datasets +./get_test_data.sh --self_loops +``` +``` +/datasets/self_loops + |-ca-AstroPh (5.3M) + |-ca-CondMat (2.8M) + |-ca-GrQc (348K) + |-ca-HepTh (763K) +``` +These datasets are not currently used by any tests or benchmarks + +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| ------------- | ------- | -------- | -------- | -------- | ---------- | ---------- | ------------ | ----------- | +| ca-AstroPh | 18,772 | 198,110 | No | No | Yes | No | No | No | +| ca-CondMat | 23,133 | 93,497 | No | Yes | Yes | No | No | No | +| ca-GrQc | 5,242 | 14,387 | No | No | Yes | No | No | No | +| ca-HepTh | 9,877 | 25,998 | No | Yes | Yes | No | No | No | + +**ca-AstroPh** : The graph "ca-AstroPh" covers scientific collaborations between authors papers submitted to Astro Physics category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-CondMat** : The graph "ca-CondMat" covers scientific collaborations between authors papers submitted to Condense Matter category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-GrQc** : The graph "ca-GrQc" covers scientific collaborations between authors papers submitted to General Relativity and Quantum Cosmology category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + +**ca-HepTh** : The graph "ca-HepTh" covers scientific collaborations between authors papers submitted to High Energy Physics - Theory category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. + + +## Custom path to larger datasets directory + +Cugraph's C++ and Python analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory. +``` +cd /datasets +./get_test_data.sh +``` +You may run this script from elsewhere and store C++ or Python test input to another location. + +Before running the tests, you should let cuGraph know where to find the test input by using: +``` +export RAPIDS_DATASET_ROOT_DIR= +``` + + +## Benchmarks + +Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option: +``` +cd /datasets +./get_test_data.sh --benchmark +``` +The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs: +``` +/datasets/csv + |- directed + |--- cit-Patents.csv (250M) + |--- soc-LiveJournal1.csv (965M) + |- undirected + |--- europe_osm.csv (1.8G) + |--- hollywood.csv (1.5G) + |--- soc-twitter-2010.csv (8.8G) +``` +The benchmark datasets are described below: +| Graph | V | E | Directed | Weighted | +| ----------------- | ---------- | ------------- | -------- | -------- | +| cit-Patents | 3,774,768 | 16,518,948 | Yes | No | +| soc-LiveJournal1 | 4,847,571 | 43,369,619 | Yes | No | +| europe_osm | 50,912,018 | 54,054,660 | No | No | +| hollywood | 1,139,905 | 57,515,616 | No | No | +| soc-twitter-2010 | 21,297,772 | 265,025,809 | No | No | + +**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations. +**soc-LiveJournal** : A graph of the LiveJournal social network. +**europe_osm** : A graph of OpenStreetMap data for Europe. +**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together. +**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i. + +_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._ + +## Reference +The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/ +The Stanford Network Analysis Platform (SNAP) diff --git a/datasets/dolphins_multi_edge.csv b/datasets/dolphins_multi_edge.csv new file mode 100644 index 00000000000..cf6bc70918e --- /dev/null +++ b/datasets/dolphins_multi_edge.csv @@ -0,0 +1,325 @@ +10 0 1.0 +14 0 1.0 +15 0 1.0 +40 0 1.0 +42 0 1.0 +47 0 1.0 +17 1 1.0 +19 1 1.0 +26 1 1.0 +27 1 1.0 +28 1 1.0 +36 1 1.0 +41 1 1.0 +54 1 1.0 +10 2 1.0 +42 2 1.0 +44 2 1.0 +61 2 1.0 +8 3 1.0 +14 3 1.0 +59 3 1.0 +51 4 1.0 +9 5 1.0 +13 5 1.0 +56 5 1.0 +57 5 1.0 +9 6 1.0 +13 6 1.0 +17 6 1.0 +54 6 1.0 +56 6 1.0 +57 6 1.0 +19 7 1.0 +27 7 1.0 +30 7 1.0 +40 7 1.0 +54 7 1.0 +20 8 1.0 +28 8 1.0 +37 8 1.0 +45 8 1.0 +59 8 1.0 +13 9 1.0 +17 9 1.0 +32 9 1.0 +41 9 1.0 +57 9 1.0 +29 10 1.0 +42 10 1.0 +47 10 1.0 +51 11 1.0 +33 12 1.0 +17 13 1.0 +32 13 1.0 +41 13 1.0 +54 13 1.0 +57 13 1.0 +16 14 1.0 +24 14 1.0 +33 14 1.0 +34 14 1.0 +37 14 1.0 +38 14 1.0 +40 14 1.0 +43 14 1.0 +50 14 1.0 +52 14 1.0 +18 15 1.0 +24 15 1.0 +40 15 1.0 +45 15 1.0 +55 15 1.0 +59 15 1.0 +20 16 1.0 +33 16 1.0 +37 16 1.0 +38 16 1.0 +50 16 1.0 +22 17 1.0 +25 17 1.0 +27 17 1.0 +31 17 1.0 +57 17 1.0 +20 18 1.0 +21 18 1.0 +24 18 1.0 +29 18 1.0 +45 18 1.0 +51 18 1.0 +30 19 1.0 +54 19 1.0 +28 20 1.0 +36 20 1.0 +38 20 1.0 +44 20 1.0 +47 20 1.0 +50 20 1.0 +29 21 1.0 +33 21 1.0 +37 21 1.0 +45 21 1.0 +51 21 1.0 +36 23 1.0 +45 23 1.0 +51 23 1.0 +29 24 1.0 +45 24 1.0 +51 24 1.0 +26 25 1.0 +27 25 1.0 +27 26 1.0 +30 28 1.0 +47 28 1.0 +35 29 1.0 +43 29 1.0 +45 29 1.0 +51 29 1.0 +52 29 1.0 +42 30 1.0 +47 30 1.0 +60 32 1.0 +34 33 1.0 +37 33 1.0 +38 33 1.0 +40 33 1.0 +43 33 1.0 +50 33 1.0 +37 34 1.0 +44 34 1.0 +49 34 1.0 +37 36 1.0 +39 36 1.0 +40 36 1.0 +59 36 1.0 +40 37 1.0 +43 37 1.0 +45 37 1.0 +61 37 1.0 +43 38 1.0 +44 38 1.0 +52 38 1.0 +58 38 1.0 +57 39 1.0 +52 40 1.0 +54 41 1.0 +54 41 1.0 +57 41 1.0 +47 42 1.0 +50 42 1.0 +50 42 1.0 +46 43 1.0 +53 43 1.0 +50 45 1.0 +51 45 1.0 +59 45 1.0 +59 45 1.0 +49 46 1.0 +57 48 1.0 +51 50 1.0 +55 51 1.0 +61 53 1.0 +57 54 1.0 +0 10 1.0 +0 14 1.0 +0 15 1.0 +59 45 1.0 +0 40 1.0 +0 42 1.0 +0 47 1.0 +1 17 1.0 +1 19 1.0 +1 26 1.0 +1 27 1.0 +1 28 1.0 +1 36 1.0 +1 41 1.0 +1 54 1.0 +2 10 1.0 +2 42 1.0 +2 44 1.0 +2 61 1.0 +54 41 1.0 +3 8 1.0 +3 14 1.0 +3 59 1.0 +4 51 1.0 +56 6 1.0 +5 9 1.0 +5 13 1.0 +5 56 1.0 +5 57 1.0 +6 9 1.0 +6 13 1.0 +6 17 1.0 +6 54 1.0 +6 56 1.0 +6 57 1.0 +7 19 1.0 +7 27 1.0 +7 30 1.0 +7 40 1.0 +7 54 1.0 +8 20 1.0 +8 28 1.0 +8 37 1.0 +8 45 1.0 +2 61 1.0 +8 59 1.0 +9 13 1.0 +9 17 1.0 +9 32 1.0 +9 41 1.0 +9 57 1.0 +10 29 1.0 +10 42 1.0 +10 47 1.0 +11 51 1.0 +12 33 1.0 +13 17 1.0 +13 32 1.0 +13 41 1.0 +13 54 1.0 +13 57 1.0 +14 16 1.0 +14 24 1.0 +14 33 1.0 +14 34 1.0 +14 37 1.0 +14 38 1.0 +14 40 1.0 +14 43 1.0 +14 50 1.0 +14 52 1.0 +15 18 1.0 +15 24 1.0 +15 40 1.0 +15 45 1.0 +15 55 1.0 +15 59 1.0 +16 20 1.0 +16 33 1.0 +16 37 1.0 +16 38 1.0 +16 50 1.0 +17 22 1.0 +17 25 1.0 +17 27 1.0 +17 31 1.0 +17 57 1.0 +18 20 1.0 +18 21 1.0 +18 24 1.0 +18 29 1.0 +18 45 1.0 +18 51 1.0 +19 30 1.0 +19 54 1.0 +20 28 1.0 +20 36 1.0 +20 38 1.0 +20 44 1.0 +20 47 1.0 +20 50 1.0 +21 29 1.0 +21 33 1.0 +21 37 1.0 +21 45 1.0 +21 51 1.0 +23 36 1.0 +23 45 1.0 +23 51 1.0 +24 29 1.0 +24 45 1.0 +24 51 1.0 +25 26 1.0 +25 27 1.0 +26 27 1.0 +28 30 1.0 +28 47 1.0 +29 35 1.0 +29 43 1.0 +29 45 1.0 +29 51 1.0 +29 52 1.0 +30 42 1.0 +30 47 1.0 +32 60 1.0 +33 34 1.0 +33 37 1.0 +33 38 1.0 +33 40 1.0 +33 43 1.0 +33 50 1.0 +34 37 1.0 +34 44 1.0 +34 49 1.0 +36 37 1.0 +36 39 1.0 +36 40 1.0 +36 59 1.0 +37 40 1.0 +37 43 1.0 +37 45 1.0 +37 61 1.0 +38 43 1.0 +38 44 1.0 +38 52 1.0 +38 58 1.0 +39 57 1.0 +40 52 1.0 +41 54 1.0 +41 57 1.0 +42 47 1.0 +42 50 1.0 +43 46 1.0 +43 53 1.0 +45 50 1.0 +45 51 1.0 +45 59 1.0 +46 49 1.0 +48 57 1.0 +50 51 1.0 +51 55 1.0 +53 61 1.0 +54 57 1.0 diff --git a/datasets/dolphins_s_loop.csv b/datasets/dolphins_s_loop.csv new file mode 100644 index 00000000000..703b8440afa --- /dev/null +++ b/datasets/dolphins_s_loop.csv @@ -0,0 +1,321 @@ +10 0 1.0 +14 0 1.0 +15 0 1.0 +40 0 1.0 +42 0 1.0 +47 0 1.0 +17 1 1.0 +19 1 1.0 +26 1 1.0 +27 1 1.0 +28 1 1.0 +36 1 1.0 +41 1 1.0 +54 1 1.0 +10 2 1.0 +42 2 1.0 +44 2 1.0 +61 2 1.0 +8 3 1.0 +14 3 1.0 +59 3 1.0 +51 4 1.0 +9 5 1.0 +13 5 1.0 +56 5 1.0 +57 5 1.0 +9 6 1.0 +13 6 1.0 +17 6 1.0 +54 6 1.0 +56 6 1.0 +57 6 1.0 +19 7 1.0 +27 7 1.0 +30 7 1.0 +40 7 1.0 +54 7 1.0 +20 8 1.0 +28 8 1.0 +37 8 1.0 +45 8 1.0 +59 8 1.0 +13 9 1.0 +17 9 1.0 +32 9 1.0 +41 9 1.0 +57 9 1.0 +29 10 1.0 +42 10 1.0 +47 10 1.0 +51 11 1.0 +33 12 1.0 +17 13 1.0 +32 13 1.0 +41 13 1.0 +54 13 1.0 +57 13 1.0 +16 14 1.0 +24 14 1.0 +33 14 1.0 +34 14 1.0 +37 14 1.0 +38 14 1.0 +40 14 1.0 +43 14 1.0 +50 14 1.0 +52 14 1.0 +18 15 1.0 +24 15 1.0 +40 15 1.0 +45 15 1.0 +55 15 1.0 +59 15 1.0 +20 16 1.0 +33 16 1.0 +37 16 1.0 +38 16 1.0 +50 16 1.0 +22 17 1.0 +25 17 1.0 +27 17 1.0 +31 17 1.0 +57 17 1.0 +20 18 1.0 +21 18 1.0 +24 18 1.0 +29 18 1.0 +45 18 1.0 +51 18 1.0 +30 19 1.0 +54 19 1.0 +28 20 1.0 +36 20 1.0 +38 20 1.0 +44 20 1.0 +47 20 1.0 +50 20 1.0 +29 21 1.0 +33 21 1.0 +37 21 1.0 +45 21 1.0 +51 21 1.0 +36 23 1.0 +45 23 1.0 +51 23 1.0 +29 24 1.0 +45 24 1.0 +51 24 1.0 +26 25 1.0 +27 25 1.0 +27 26 1.0 +30 28 1.0 +47 28 1.0 +35 29 1.0 +43 29 1.0 +45 29 1.0 +51 29 1.0 +52 29 1.0 +42 30 1.0 +47 30 1.0 +60 32 1.0 +34 33 1.0 +37 33 1.0 +38 33 1.0 +40 33 1.0 +43 33 1.0 +50 33 1.0 +37 34 1.0 +44 34 1.0 +49 34 1.0 +37 36 1.0 +39 36 1.0 +40 36 1.0 +59 36 1.0 +40 37 1.0 +43 37 1.0 +43 43 1.0 +45 37 1.0 +61 37 1.0 +43 38 1.0 +44 38 1.0 +52 38 1.0 +58 38 1.0 +57 39 1.0 +52 40 1.0 +52 52 1.0 +54 41 1.0 +57 41 1.0 +47 42 1.0 +50 42 1.0 +46 43 1.0 +53 43 1.0 +50 45 1.0 +51 45 1.0 +59 45 1.0 +49 46 1.0 +57 48 1.0 +51 50 1.0 +55 51 1.0 +61 53 1.0 +57 54 1.0 +0 10 1.0 +0 14 1.0 +0 15 1.0 +0 40 1.0 +0 42 1.0 +0 47 1.0 +1 17 1.0 +1 19 1.0 +1 26 1.0 +1 1 1.0 +1 27 1.0 +1 28 1.0 +1 36 1.0 +1 41 1.0 +1 54 1.0 +2 10 1.0 +2 42 1.0 +2 44 1.0 +2 61 1.0 +3 8 1.0 +3 14 1.0 +3 59 1.0 +4 51 1.0 +5 9 1.0 +5 13 1.0 +5 56 1.0 +5 57 1.0 +6 9 1.0 +6 13 1.0 +6 17 1.0 +6 54 1.0 +6 56 1.0 +6 57 1.0 +7 19 1.0 +7 27 1.0 +7 30 1.0 +7 40 1.0 +7 54 1.0 +8 20 1.0 +8 28 1.0 +8 37 1.0 +8 45 1.0 +8 59 1.0 +9 13 1.0 +9 17 1.0 +9 32 1.0 +9 41 1.0 +9 57 1.0 +10 29 1.0 +10 42 1.0 +10 47 1.0 +11 51 1.0 +12 33 1.0 +13 17 1.0 +13 32 1.0 +13 41 1.0 +13 54 1.0 +13 57 1.0 +14 16 1.0 +14 24 1.0 +14 33 1.0 +14 34 1.0 +14 37 1.0 +14 38 1.0 +14 40 1.0 +14 43 1.0 +14 50 1.0 +14 52 1.0 +15 18 1.0 +15 24 1.0 +15 40 1.0 +15 45 1.0 +15 55 1.0 +15 59 1.0 +16 20 1.0 +16 33 1.0 +16 37 1.0 +16 38 1.0 +16 50 1.0 +17 22 1.0 +17 25 1.0 +17 27 1.0 +17 31 1.0 +17 57 1.0 +18 20 1.0 +18 21 1.0 +18 24 1.0 +18 29 1.0 +18 45 1.0 +18 51 1.0 +19 30 1.0 +19 54 1.0 +20 28 1.0 +20 36 1.0 +20 38 1.0 +20 44 1.0 +20 47 1.0 +20 50 1.0 +21 29 1.0 +21 33 1.0 +21 37 1.0 +21 45 1.0 +21 51 1.0 +23 36 1.0 +23 45 1.0 +23 51 1.0 +24 29 1.0 +24 45 1.0 +24 51 1.0 +25 26 1.0 +25 27 1.0 +26 27 1.0 +28 30 1.0 +28 47 1.0 +29 35 1.0 +29 43 1.0 +29 45 1.0 +29 51 1.0 +29 52 1.0 +30 42 1.0 +30 47 1.0 +32 60 1.0 +33 34 1.0 +33 37 1.0 +33 38 1.0 +33 40 1.0 +33 43 1.0 +33 50 1.0 +34 37 1.0 +34 44 1.0 +34 49 1.0 +36 37 1.0 +36 39 1.0 +36 40 1.0 +36 59 1.0 +37 40 1.0 +37 43 1.0 +37 45 1.0 +37 61 1.0 +38 43 1.0 +38 44 1.0 +38 52 1.0 +38 58 1.0 +39 57 1.0 +40 52 1.0 +41 54 1.0 +41 57 1.0 +42 47 1.0 +42 50 1.0 +43 46 1.0 +43 53 1.0 +45 50 1.0 +45 51 1.0 +45 59 1.0 +46 49 1.0 +48 57 1.0 +50 51 1.0 +51 55 1.0 +53 61 1.0 +54 57 1.0 diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index 3e0b6c55c37..0bd97b55cb5 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -61,6 +61,12 @@ BENCHMARK_DATASET_DATA=" https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data.tgz csv " + +SELF_LOOPS_DATASET_DATA=" +# ~1s download +https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data_self_loops.tgz +self_loops +" ################################################################################ # Do not change the script below this line if only adding/updating a dataset @@ -71,7 +77,7 @@ function hasArg { } if hasArg -h || hasArg --help; then - echo "$0 [--subset | --benchmark]" + echo "$0 [--subset | --benchmark | --self_loops]" exit 0 fi @@ -80,6 +86,8 @@ if hasArg "--benchmark"; then DATASET_DATA="${BENCHMARK_DATASET_DATA}" elif hasArg "--subset"; then DATASET_DATA="${BASE_DATASET_DATA}" +elif hasArg "--self_loops"; then + DATASET_DATA="${SELF_LOOPS_DATASET_DATA}" # Do not include benchmark datasets by default - too big else DATASET_DATA="${BASE_DATASET_DATA} ${EXTENDED_DATASET_DATA}" diff --git a/datasets/karate_mod.mtx b/datasets/karate_mod.mtx new file mode 100644 index 00000000000..3a562406800 --- /dev/null +++ b/datasets/karate_mod.mtx @@ -0,0 +1,81 @@ +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +11 1 +12 1 +13 1 +14 1 +18 1 +20 1 +22 1 +32 1 +3 2 +4 2 +8 2 +14 2 +18 2 +20 2 +22 2 +31 2 +4 3 +8 3 +9 3 +10 3 +14 3 +28 3 +29 3 +33 3 +8 4 +13 4 +14 4 +7 5 +11 5 +7 6 +11 6 +17 6 +17 7 +31 9 +33 9 +34 9 +34 10 +34 14 +33 15 +34 15 +33 16 +34 16 +33 19 +34 19 +34 20 +33 21 +34 21 +33 23 +34 23 +26 24 +28 24 +30 24 +33 24 +34 24 +26 25 +28 25 +32 25 +32 26 +30 27 +34 27 +34 28 +32 29 +34 29 +33 30 +34 30 +33 31 +34 31 +33 32 +34 32 +34 33 +35 +36 +37 diff --git a/datasets/karate_multi_edge.csv b/datasets/karate_multi_edge.csv new file mode 100644 index 00000000000..6f331b77a59 --- /dev/null +++ b/datasets/karate_multi_edge.csv @@ -0,0 +1,160 @@ +1 0 1.0 +2 0 1.0 +3 0 1.0 +4 0 1.0 +5 0 1.0 +6 0 1.0 +7 0 1.0 +8 0 1.0 +10 0 1.0 +11 0 1.0 +12 0 1.0 +13 0 1.0 +17 0 1.0 +19 0 1.0 +21 0 1.0 +31 0 1.0 +2 1 1.0 +3 1 1.0 +7 1 1.0 +13 1 1.0 +7 0 1.0 +17 1 1.0 +19 1 1.0 +21 1 1.0 +30 1 1.0 +3 2 1.0 +7 2 1.0 +8 2 1.0 +9 2 1.0 +13 2 1.0 +27 2 1.0 +28 2 1.0 +32 2 1.0 +7 3 1.0 +12 3 1.0 +13 3 1.0 +6 4 1.0 +10 4 1.0 +6 5 1.0 +10 5 1.0 +16 5 1.0 +16 6 1.0 +30 8 1.0 +32 8 1.0 +33 8 1.0 +28 2 1.0 +33 9 1.0 +33 13 1.0 +32 14 1.0 +33 14 1.0 +32 15 1.0 +33 15 1.0 +32 18 1.0 +33 18 1.0 +33 19 1.0 +32 20 1.0 +33 20 1.0 +32 22 1.0 +33 22 1.0 +25 23 1.0 +27 23 1.0 +29 23 1.0 +32 23 1.0 +33 23 1.0 +25 24 1.0 +27 24 1.0 +31 24 1.0 +31 25 1.0 +29 26 1.0 +33 26 1.0 +33 27 1.0 +31 28 1.0 +33 28 1.0 +32 29 1.0 +33 29 1.0 +32 22 1.0 +32 30 1.0 +33 30 1.0 +32 31 1.0 +33 31 1.0 +33 32 1.0 +0 1 1.0 +0 2 1.0 +0 3 1.0 +0 4 1.0 +0 5 1.0 +0 6 1.0 +0 7 1.0 +0 8 1.0 +0 10 1.0 +0 11 1.0 +0 12 1.0 +0 6 1.0 +0 13 1.0 +0 17 1.0 +0 19 1.0 +0 21 1.0 +0 31 1.0 +1 2 1.0 +1 3 1.0 +1 7 1.0 +1 13 1.0 +1 17 1.0 +1 19 1.0 +1 21 1.0 +1 30 1.0 +2 3 1.0 +2 7 1.0 +2 8 1.0 +2 9 1.0 +2 13 1.0 +2 27 1.0 +2 28 1.0 +2 32 1.0 +3 7 1.0 +3 12 1.0 +3 13 1.0 +4 6 1.0 +4 10 1.0 +5 6 1.0 +5 10 1.0 +5 16 1.0 +6 16 1.0 +8 30 1.0 +8 32 1.0 +8 33 1.0 +9 33 1.0 +13 33 1.0 +14 32 1.0 +14 33 1.0 +15 32 1.0 +15 33 1.0 +18 32 1.0 +18 33 1.0 +19 33 1.0 +20 32 1.0 +20 33 1.0 +22 32 1.0 +22 33 1.0 +23 25 1.0 +23 27 1.0 +23 29 1.0 +23 32 1.0 +23 33 1.0 +24 25 1.0 +24 27 1.0 +24 31 1.0 +25 31 1.0 +26 29 1.0 +26 33 1.0 +27 33 1.0 +28 31 1.0 +28 33 1.0 +29 32 1.0 +29 33 1.0 +30 32 1.0 +30 33 1.0 +31 32 1.0 +31 33 1.0 +32 33 1.0 diff --git a/datasets/karate_s_loop.csv b/datasets/karate_s_loop.csv new file mode 100644 index 00000000000..3959e5f98b3 --- /dev/null +++ b/datasets/karate_s_loop.csv @@ -0,0 +1,160 @@ +1 0 1.0 +2 0 1.0 +3 0 1.0 +4 0 1.0 +5 0 1.0 +6 0 1.0 +7 0 1.0 +8 0 1.0 +10 0 1.0 +11 0 1.0 +12 0 1.0 +13 0 1.0 +17 0 1.0 +19 0 1.0 +21 0 1.0 +31 0 1.0 +2 1 1.0 +3 1 1.0 +7 1 1.0 +13 1 1.0 +17 1 1.0 +19 1 1.0 +21 1 1.0 +30 1 1.0 +3 2 1.0 +7 2 1.0 +8 2 1.0 +9 2 1.0 +13 2 1.0 +27 2 1.0 +28 2 1.0 +32 2 1.0 +7 3 1.0 +12 3 1.0 +13 3 1.0 +6 4 1.0 +10 4 1.0 +6 5 1.0 +10 5 1.0 +10 10 1.0 +16 5 1.0 +16 6 1.0 +30 8 1.0 +32 8 1.0 +33 8 1.0 +33 9 1.0 +33 13 1.0 +32 14 1.0 +33 14 1.0 +32 15 1.0 +33 15 1.0 +32 18 1.0 +33 18 1.0 +33 19 1.0 +32 20 1.0 +33 20 1.0 +32 22 1.0 +33 22 1.0 +25 23 1.0 +27 23 1.0 +29 23 1.0 +32 23 1.0 +33 23 1.0 +25 24 1.0 +27 24 1.0 +31 24 1.0 +31 25 1.0 +29 26 1.0 +33 26 1.0 +33 27 1.0 +31 28 1.0 +33 28 1.0 +32 29 1.0 +33 29 1.0 +32 30 1.0 +33 30 1.0 +32 31 1.0 +33 31 1.0 +33 32 1.0 +0 1 1.0 +0 2 1.0 +0 3 1.0 +0 4 1.0 +0 5 1.0 +0 6 1.0 +0 7 1.0 +0 8 1.0 +0 10 1.0 +0 11 1.0 +0 12 1.0 +0 13 1.0 +0 17 1.0 +0 19 1.0 +0 21 1.0 +0 31 1.0 +1 2 1.0 +1 3 1.0 +1 7 1.0 +1 13 1.0 +1 1 1.0 +1 17 1.0 +1 19 1.0 +1 21 1.0 +1 30 1.0 +2 3 1.0 +2 7 1.0 +2 8 1.0 +2 9 1.0 +2 13 1.0 +2 27 1.0 +2 28 1.0 +2 32 1.0 +3 7 1.0 +3 12 1.0 +3 13 1.0 +4 6 1.0 +4 10 1.0 +5 6 1.0 +5 10 1.0 +5 16 1.0 +6 16 1.0 +8 30 1.0 +8 32 1.0 +8 33 1.0 +9 33 1.0 +13 33 1.0 +13 13 1.0 +14 32 1.0 +14 33 1.0 +15 32 1.0 +15 33 1.0 +18 32 1.0 +18 33 1.0 +19 33 1.0 +20 32 1.0 +20 33 1.0 +22 32 1.0 +22 33 1.0 +23 25 1.0 +23 27 1.0 +23 29 1.0 +23 32 1.0 +23 33 1.0 +24 25 1.0 +24 27 1.0 +24 31 1.0 +25 31 1.0 +26 29 1.0 +26 33 1.0 +27 33 1.0 +28 31 1.0 +28 33 1.0 +29 32 1.0 +29 33 1.0 +30 32 1.0 +30 33 1.0 +31 32 1.0 +31 31 1.0 +31 33 1.0 +32 33 1.0 diff --git a/datasets/karate_str.mtx b/datasets/karate_str.mtx new file mode 100644 index 00000000000..0564d30f91d --- /dev/null +++ b/datasets/karate_str.mtx @@ -0,0 +1,78 @@ +9q a9 1 +ts a9 1 +kt a9 1 +j7 a9 1 +wr a9 1 +n3 a9 1 +2w a9 1 +8a a9 1 +ci a9 1 +cq a9 1 +ca a9 1 +gd a9 1 +y4 a9 1 +kx a9 1 +u3 a9 1 +id a9 1 +ts 9q 1 +kt 9q 1 +2w 9q 1 +gd 9q 1 +y4 9q 1 +kx 9q 1 +u3 9q 1 +7p 9q 1 +kt ts 1 +2w ts 1 +8a ts 1 +ax ts 1 +gd ts 1 +84 ts 1 +ar ts 1 +05 ts 1 +2w kt 1 +ca kt 1 +gd kt 1 +n3 j7 1 +ci j7 1 +n3 wr 1 +ci wr 1 +27 wr 1 +27 n3 1 +7p 8a 1 +05 8a 1 +ux 8a 1 +ux ax 1 +ux gd 1 +05 r9 1 +ux r9 1 +05 44 1 +ux 44 1 +05 a6 1 +ux a6 1 +ux kx 1 +05 d5 1 +ux d5 1 +05 gk 1 +ux gk 1 +fo em 1 +84 em 1 +wc em 1 +05 em 1 +ux em 1 +fo 1j 1 +84 1j 1 +id 1j 1 +id fo 1 +wc nm 1 +ux nm 1 +ux 84 1 +id ar 1 +ux ar 1 +05 wc 1 +ux wc 1 +05 7p 1 +ux 7p 1 +05 id 1 +ux id 1 +ux 05 1 diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index c2c14e0c02d..56e90b1f6bb 100755 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -61,6 +61,21 @@ "netscience.csv"] ] +DATASETS_MULTI_EDGES = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_multi_edge.csv", + "dolphins_multi_edge.csv"] +] + +DATASETS_STR_ISLT_V = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_mod.mtx", + "karate_str.mtx"] +] + +DATASETS_SELF_LOOPS = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate_s_loop.csv", + "dolphins_s_loop.csv"] +] + # '../datasets/email-Eu-core.csv'] From 254a999fab9e925e7444c968c3942a5ef6e48d8d Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 22 Mar 2021 17:02:03 -0500 Subject: [PATCH 27/51] Added cmake function and .hpp template for generating version_config.hpp file. (#1476) Adds cmake function and .hpp template for generating a `version_config.hpp` file, similar to RMM's file of the same name. This allows C++ clients to include the file from the libcugraph install to query version information for reporting, checking compatibility, etc. Tested by building and installing libcugraph and checking that `version_config.hpp` was present in the conda environment and contained the correct information. closes #1472 FYI @anaruse Authors: - Rick Ratzel (@rlratzel) Approvers: - Brad Rees (@BradReesWork) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1476 --- cpp/CMakeLists.txt | 7 +++++++ cpp/cmake/Modules/Version.cmake | 18 ++++++++++++++++++ cpp/cmake/version_config.hpp.in | 20 ++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 cpp/cmake/Modules/Version.cmake create mode 100644 cpp/cmake/version_config.hpp.in diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 26a8f98e265..34ea935e31d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -18,6 +18,10 @@ cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) +# Write the version header +include(cmake/Modules/Version.cmake) +write_version() + ################################################################################################### # - build type ------------------------------------------------------------------------------------ @@ -560,6 +564,9 @@ install(TARGETS cugraph LIBRARY install(DIRECTORY include/ DESTINATION include/cugraph) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp + DESTINATION include/cugraph) + install(DIRECTORY ${RAFT_DIR}/cpp/include/raft/ DESTINATION include/cugraph/raft) ################################################################################################### diff --git a/cpp/cmake/Modules/Version.cmake b/cpp/cmake/Modules/Version.cmake new file mode 100644 index 00000000000..15046784175 --- /dev/null +++ b/cpp/cmake/Modules/Version.cmake @@ -0,0 +1,18 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# Generate version_config.hpp from the version found in CMakeLists.txt +function(write_version) + message(STATUS "CUGRAPH VERSION: ${CUGRAPH_VERSION}") + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version_config.hpp.in + ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp @ONLY) +endfunction(write_version) diff --git a/cpp/cmake/version_config.hpp.in b/cpp/cmake/version_config.hpp.in new file mode 100644 index 00000000000..c669d1b97f3 --- /dev/null +++ b/cpp/cmake/version_config.hpp.in @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#define CUGRAPH_VERSION_MAJOR @CUGRAPH_VERSION_MAJOR@ +#define CUGRAPH_VERSION_MINOR @CUGRAPH_VERSION_MINOR@ +#define CUGRAPH_VERSION_PATCH @CUGRAPH_VERSION_PATCH@ From 7256f329773afe93deecd43f3cea3eceefc7fed1 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 23 Mar 2021 14:02:14 -0500 Subject: [PATCH 28/51] Fix for bug in SCC on self-loops (#1475) This provides fixes for strongly connected components on graphs with self-loops: https://github.com/rapidsai/cugraph/issues/1471. closes #1471 Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1475 --- cpp/src/components/connectivity.cu | 2 +- cpp/src/components/scc_matrix.cuh | 119 +++++++------ cpp/tests/components/scc_test.cu | 275 ++++++++++++++++++++++++++--- 3 files changed, 310 insertions(+), 86 deletions(-) diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index f4c7bf1d35c..09412160b37 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -78,7 +78,7 @@ std::enable_if_t::value> connected_components_impl( stream); } else { SCC_Data sccd(nrows, graph.offsets, graph.indices); - sccd.run_scc(labels); + auto num_iters = sccd.run_scc(labels); } } } // namespace detail diff --git a/cpp/src/components/scc_matrix.cuh b/cpp/src/components/scc_matrix.cuh index 801f1fe0fad..c7f4506b74e 100644 --- a/cpp/src/components/scc_matrix.cuh +++ b/cpp/src/components/scc_matrix.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,12 +71,13 @@ struct SCC_Data { p_d_r_o_(p_d_r_o), p_d_c_i_(p_d_c_i), d_C(nrows * nrows, 0), - d_Cprev(nrows * nrows, 0) + d_Cprev(nrows * nrows, 0), + p_d_C_(d_C.data().get()) { init(); } - const thrust::device_vector& get_C(void) const { return d_C; } + ByteT const* get_Cptr(void) const { return p_d_C_; } size_t nrows(void) const { return nrows_; } @@ -100,13 +101,12 @@ struct SCC_Data { void get_labels(IndexT* d_labels) const { - auto* p_d_C = d_C.data().get(); - size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_` + size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_` thrust::transform(thrust::device, thrust::make_counting_iterator(0), thrust::make_counting_iterator(nrows_), d_labels, - [n, p_d_C] __device__(IndexT k) { + [n, p_d_C = p_d_C_] __device__(IndexT k) { auto begin = p_d_C + k * n; auto end = begin + n; ByteT one{1}; @@ -124,7 +124,6 @@ struct SCC_Data { size_t nrows = nrows_; size_t count = 0; - ByteT* p_d_C = d_C.data().get(); ByteT* p_d_Cprev = get_Cprev().data().get(); size_t n2 = nrows * nrows; @@ -136,57 +135,60 @@ struct SCC_Data { do { flag.set(0); - thrust::for_each(thrust::device, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(n2), - [nrows, p_d_C, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) { - ByteT one{1}; - - auto i = indx / nrows; - auto j = indx % nrows; - - if ((i == j) || (p_d_Cprev[indx] == one)) - p_d_C[indx] = one; - else { - // this is where a hash-map could help: - // only need hashmap[(i,j)]={0,1} (`1` for "hit"); - // and only for new entries! - // already existent entries are covered by - // the `if`-branch above! - // Hence, hashmap[] can use limited space: - // M = max_l{number(new `1` entries)}, where - // l = #iterations in the do-loop! - // M ~ new `1` entries between A^k and A^{k+1}, - // k=1,2,... - // Might M actually be M ~ nnz(A) = |E| ?! - // Probably, because the primitive hash - //(via find_if) uses a search space of nnz(A) - // - // But, what if more than 1 entry pops-up in a row? - // Not an issue! Because the hash key is (i,j), and no - // more than one entry can exist in position (i,j)! - // - // And remember, we only need to store the new (i,j) keys - // that an iteration produces wrt to the previous iteration! - // - auto begin = p_d_ci + p_d_ro[i]; - auto end = p_d_ci + p_d_ro[i + 1]; - auto pos = thrust::find_if( - thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) { - return (p_d_Cprev[k * nrows + j] == one); - }); - - if (pos != end) p_d_C[indx] = one; - } - - if (p_d_C[indx] != p_d_Cprev[indx]) - *p_d_flag = 1; // race-condition: harmless, worst case many threads - // write the same value - }); + thrust::for_each( + thrust::device, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n2), + [nrows, p_d_C = p_d_C_, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) { + ByteT one{1}; + + auto i = indx / nrows; + auto j = indx % nrows; + + if ((i == j) || (p_d_Cprev[indx] == one)) { + p_d_C[indx] = one; + } else { + // this ammounts to A (^,v) B + // (where A = adjacency matrix defined by (p_ro, p_ci), + // B := p_d_Cprev; (^,v) := (*,+) semiring); + // Here's why: + // (A (^,v) B)[i][j] := A[i][.] (^,v) B[j][.] + // (where X[i][.] := i-th row of X; + // X[.][j] := j-th column of X); + // which is: + // 1, iff A[i][.] and B[j][.] have a 1 in the same location, + // 0, otherwise; + // + // i.e., corresponfing entry in p_d_C is 1 + // if B[k][j] == 1 for any column k in A's i-th row; + // hence, for each column k of row A[i][.], + // which is the set: + // k \in {p_ci + p_ro[i], ..., p_ci + p_ro[i+1] - 1}, + // check if (B[k][j] == 1), + // i.e., p_d_Cprev[k*nrows + j]) == 1: + // + auto begin = p_d_ci + p_d_ro[i]; + auto end = p_d_ci + p_d_ro[i + 1]; + auto pos = thrust::find_if( + thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) { + return (p_d_Cprev[k * nrows + j] == one); + }); + + if (pos != end) p_d_C[indx] = one; + } + + if (p_d_C[indx] != p_d_Cprev[indx]) + *p_d_flag = 1; // race-condition: harmless, + // worst case many threads + // write the _same_ value + }); ++count; cudaDeviceSynchronize(); - std::swap(p_d_C, p_d_Cprev); + std::swap(p_d_C_, p_d_Cprev); // Note 1: this swap makes `p_d_Cprev` the + // most recently updated matrix pointer + // at the end of this loop + // (see `Note 2` why this matters); } while (flag.is_set()); // C & Ct: @@ -196,11 +198,13 @@ struct SCC_Data { thrust::for_each(thrust::device, thrust::make_counting_iterator(0), thrust::make_counting_iterator(n2), - [nrows, p_d_C, p_d_Cprev] __device__(size_t indx) { + [nrows, p_d_C = p_d_C_, p_d_Cprev] __device__(size_t indx) { auto i = indx / nrows; auto j = indx % nrows; auto tindx = j * nrows + i; + // Note 2: per Note 1, p_d_Cprev is latest: + // p_d_C[indx] = (p_d_Cprev[indx]) & (p_d_Cprev[tindx]); }); @@ -215,6 +219,9 @@ struct SCC_Data { const IndexT* p_d_c_i_; // column indices thrust::device_vector d_C; thrust::device_vector d_Cprev; + ByteT* p_d_C_{nullptr}; // holds the most recent update, + // which can have storage in any of d_C or d_Cprev, + // because the pointers get swapped! thrust::device_vector& get_Cprev(void) { return d_Cprev; } }; diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index 9d5b55f34c6..a74b5a0ad27 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -24,6 +24,9 @@ #include +#include +#include +#include #include #include @@ -57,41 +60,48 @@ struct Usecase { std::string matrix_file; }; -// checker of counts of labels for each component -// expensive, for testing purposes only; +// counts number of vertices in each component; +// (of same label); +// potentially expensive, for testing purposes only; // // params: -// p_d_labels: device array of labels of size nrows; -// nrows: |V| for graph G(V, E); -// d_v_counts: #labels for each component; (_not_ pre-allocated!) +// in: p_d_labels: device array of labels of size nrows; +// in: nrows: |V| for graph G(V, E); +// out: d_v_counts: #labels for each component; (_not_ pre-allocated!) +// return: number of components; // template -size_t get_component_sizes(const IndexT* p_d_labels, size_t nrows, DVector& d_v_counts) +size_t get_component_sizes(const IndexT* p_d_labels, + size_t nrows, + DVector& d_num_vs_per_component) { DVector d_sorted_l(p_d_labels, p_d_labels + nrows); thrust::sort(d_sorted_l.begin(), d_sorted_l.end()); - size_t counts = - thrust::distance(d_sorted_l.begin(), thrust::unique(d_sorted_l.begin(), d_sorted_l.end())); + auto pair_it = thrust::reduce_by_key(d_sorted_l.begin(), + d_sorted_l.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), // ignore... + d_num_vs_per_component.begin()); - IndexT* p_d_srt_l = d_sorted_l.data().get(); - - d_v_counts.resize(counts); - thrust::transform( - thrust::device, - d_sorted_l.begin(), - d_sorted_l.begin() + counts, - d_v_counts.begin(), - [p_d_srt_l, counts] __device__(IndexT indx) { - return thrust::count_if( - thrust::seq, p_d_srt_l, p_d_srt_l + counts, [indx](IndexT label) { return label == indx; }); - }); - - // sort the counts: - thrust::sort(d_v_counts.begin(), d_v_counts.end()); + size_t counts = thrust::distance(d_num_vs_per_component.begin(), pair_it.second); + d_num_vs_per_component.resize(counts); return counts; } + +template +DVector byte_matrix_to_int(const DVector& d_adj_byte_matrix) +{ + auto n2 = d_adj_byte_matrix.size(); + thrust::device_vector d_vec_matrix(n2, 0); + thrust::transform(d_adj_byte_matrix.begin(), + d_adj_byte_matrix.end(), + d_vec_matrix.begin(), + [] __device__(auto byte_v) { return static_cast(byte_v); }); + return d_vec_matrix; +} + } // namespace struct Tests_Strongly_CC : ::testing::TestWithParam { @@ -154,8 +164,8 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { // Allocate memory on host std::vector cooRowInd(nnz); std::vector cooColInd(nnz); - std::vector labels(m); // for G(V, E), m := |V| - std::vector verts(m); + std::vector labels(nrows); // for G(V, E), m := |V| + std::vector verts(nrows); // Read: COO Format // @@ -166,11 +176,11 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { << "\n"; ASSERT_EQ(fclose(fpin), 0); - cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, m, nnz); + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); auto G_unique = cugraph::coo_to_csr(G_coo); cugraph::GraphCSRView G = G_unique->view(); - rmm::device_vector d_labels(m); + rmm::device_vector d_labels(nrows); size_t count = 0; @@ -190,7 +200,7 @@ struct Tests_Strongly_CC : ::testing::TestWithParam { } strongly_cc_counts.push_back(count); - DVector d_counts; + DVector d_counts(nrows); auto count_labels = get_component_sizes(d_labels.data().get(), nrows, d_counts); } }; @@ -208,4 +218,211 @@ INSTANTIATE_TEST_CASE_P( Usecase("test/datasets/cage6.mtx") // DG "small" enough to meet SCC GPU memory requirements )); +struct SCCSmallTest : public ::testing::Test { +}; + +// FIXME: we should take advantage of gtest parameterization over copy-and-paste reuse. +// +TEST_F(SCCSmallTest, CustomGraphSimpleLoops) +{ + using IndexT = int; + + size_t nrows = 5; + size_t n2 = 2 * nrows * nrows; + + cudaDeviceProp prop; + int device = 0; + cudaGetDeviceProperties(&prop, device); + + ASSERT_TRUE(n2 < prop.totalGlobalMem); + + // Allocate memory on host + std::vector cooRowInd{0, 1, 2, 3, 3, 4}; + std::vector cooColInd{1, 0, 0, 1, 4, 3}; + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + EXPECT_EQ(count_components, static_cast(3)); + + std::vector v_counts(d_counts.size()); + + cudaMemcpy(v_counts.data(), + d_counts.data().get(), + sizeof(size_t) * v_counts.size(), + cudaMemcpyDeviceToHost); + + cudaDeviceSynchronize(); + + std::vector v_counts_exp{2, 1, 2}; + + EXPECT_EQ(v_counts, v_counts_exp); +} + +TEST_F(SCCSmallTest, /*DISABLED_*/ CustomGraphWithSelfLoops) +{ + using IndexT = int; + + size_t nrows = 5; + size_t n2 = 2 * nrows * nrows; + + cudaDeviceProp prop; + int device = 0; + cudaGetDeviceProperties(&prop, device); + + ASSERT_TRUE(n2 < prop.totalGlobalMem); + + // Allocate memory on host + std::vector cooRowInd{0, 0, 1, 1, 2, 2, 3, 3, 4}; + std::vector cooColInd{0, 1, 0, 1, 0, 2, 1, 3, 4}; + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + EXPECT_EQ(count_components, static_cast(4)); + + std::vector v_counts(d_counts.size()); + + cudaMemcpy(v_counts.data(), + d_counts.data().get(), + sizeof(size_t) * v_counts.size(), + cudaMemcpyDeviceToHost); + + cudaDeviceSynchronize(); + + std::vector v_counts_exp{2, 1, 1, 1}; + + EXPECT_EQ(v_counts, v_counts_exp); +} + +TEST_F(SCCSmallTest, SmallGraphWithSelfLoops1) +{ + using IndexT = int; + + size_t nrows = 3; + + std::vector cooRowInd{0, 0, 1, 2}; + std::vector cooColInd{0, 1, 0, 0}; + + std::vector v_counts_exp{2, 1}; + + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, nnz); + auto G_unique = cugraph::coo_to_csr(G_coo); + cugraph::GraphCSRView G = G_unique->view(); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + // std::cout << "vertex labels:\n"; + // print_v(d_labels, std::cout); + + decltype(count_components) num_components_exp = 2; + + EXPECT_EQ(count_components, num_components_exp); +} + +TEST_F(SCCSmallTest, SmallGraphWithIsolated) +{ + using IndexT = int; + + size_t nrows = 3; + + std::vector cooRowInd{0, 0, 1}; + std::vector cooColInd{0, 1, 0}; + + std::vector v_counts_exp{2, 1}; + + std::vector labels(nrows); + std::vector verts(nrows); + + size_t nnz = cooRowInd.size(); + + EXPECT_EQ(nnz, cooColInd.size()); + + // Note: there seems to be a BUG in coo_to_csr() or view() + // COO format doesn't account for isolated vertices; + // + // cugraph::GraphCOOView G_coo(&cooRowInd[0], &cooColInd[0], nullptr, nrows, + // nnz); + // auto G_unique = cugraph::coo_to_csr(G_coo); + // cugraph::GraphCSRView G = G_unique->view(); + // + // + // size_t num_vertices = G.number_of_vertices; + // size_t num_edges = G.number_of_edges; + // + // EXPECT_EQ(num_vertices, nrows); //fails when G was constructed from COO + // EXPECT_EQ(num_edges, nnz); + + std::vector ro{0, 2, 3, 3}; + std::vector ci{0, 1, 0}; + + nnz = ci.size(); + + thrust::device_vector d_ro(ro); + thrust::device_vector d_ci(ci); + + cugraph::GraphCSRView G{ + d_ro.data().get(), d_ci.data().get(), nullptr, static_cast(nrows), static_cast(nnz)}; + + size_t num_vertices = G.number_of_vertices; + size_t num_edges = G.number_of_edges; + + EXPECT_EQ(num_vertices, nrows); + EXPECT_EQ(num_edges, nnz); + + rmm::device_vector d_labels(nrows); + + cugraph::connected_components(G, cugraph::cugraph_cc_t::CUGRAPH_STRONG, d_labels.data().get()); + + DVector d_counts(nrows); + auto count_components = get_component_sizes(d_labels.data().get(), nrows, d_counts); + + // std::cout << "vertex labels:\n"; + // print_v(d_labels, std::cout); + + decltype(count_components) num_components_exp = 2; + + EXPECT_EQ(count_components, num_components_exp); +} + CUGRAPH_TEST_PROGRAM_MAIN() From 76fad0e2980ddce5a24cc55da94946b511b56a38 Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Thu, 25 Mar 2021 22:50:31 -0400 Subject: [PATCH 29/51] ENH Change conda build directories to work with ccache (#1452) This updates the default conda build directory to a custom path for gpuCI. Small changes to artifact paths as well to make this compatible with Project Flash. Authors: - Dillon Cullinan (@dillon-cullinan) Approvers: - AJ Schmidt (@ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1452 --- ci/cpu/build.sh | 11 +++++++---- ci/cpu/upload.sh | 5 +++-- ci/gpu/build.sh | 5 +++-- conda/recipes/cugraph/meta.yaml | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index d69448cda4e..8d12b10a640 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -26,6 +26,7 @@ export GPUCI_CONDA_RETRY_SLEEP=30 # Use Ninja to build export CMAKE_GENERATOR="Ninja" +export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld" ################################################################################ # SETUP - Check environment @@ -58,18 +59,20 @@ conda config --set ssl_verify False gpuci_logger "Build conda pkg for libcugraph" if [ "$BUILD_LIBCUGRAPH" == '1' ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/libcugraph + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcugraph else - conda build --dirty --no-remove-work-dir conda/recipes/libcugraph + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} --dirty --no-remove-work-dir conda/recipes/libcugraph + mkdir -p ${CONDA_BLD_DIR}/libcugraph/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcugraph/work fi fi gpuci_logger "Build conda pkg for cugraph" if [ "$BUILD_CUGRAPH" == "1" ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/cugraph --python=$PYTHON + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph --python=$PYTHON else - conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/.conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON fi fi diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 0fca82216c3..50e4c25b90b 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -1,4 +1,5 @@ #!/bin/bash +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh @@ -29,8 +30,8 @@ fi gpuci_logger "Get conda file output locations" -export LIBCUGRAPH_FILE=`conda build conda/recipes/libcugraph --output` -export CUGRAPH_FILE=`conda build conda/recipes/cugraph --python=$PYTHON --output` +export LIBCUGRAPH_FILE=`conda build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcugraph --output` +export CUGRAPH_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cugraph --python=$PYTHON --output` ################################################################################ # UPLOAD - Conda packages diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7242b4a11f5..30dc7373e15 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -16,6 +16,7 @@ function hasArg { export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} +export CONDA_ARTIFACT_PATH=${WORKSPACE}/ci/artifacts/cugraph/cpu/.conda-bld/ function cleanup { gpuci_logger "Removing datasets and temp files" @@ -101,11 +102,11 @@ else chrpath -d libcugraph.so patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so - CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` + CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcugraph*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" - conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" + conda install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" echo "Build cugraph..." $WORKSPACE/build.sh cugraph diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index e714b61d774..1ef64ddbe72 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -10,7 +10,7 @@ package: version: {{ version }} source: - path: ../../.. + git_url: ../../.. build: number: {{ GIT_DESCRIBE_NUMBER }} From b85bd4788be6cca0f5e57410d222cbecf2786ccf Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 26 Mar 2021 15:28:18 +0100 Subject: [PATCH 30/51] TSP solver bug fix (#1480) Addressed comments from: https://github.com/rapidsai/cugraph/issues/1450 In addition: 1. Made raft handle const. 2. Split algorithm into multiple kernel calls and updated timers. 3. Removed global symbols to retrieve results in struct. Authors: - Hugo Linsenmaier (@hlinsen) Approvers: - Brad Rees (@BradReesWork) - Andrei Schaffer (@aschaffer) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1480 --- cpp/include/algorithms.hpp | 2 +- cpp/src/traversal/tsp.cu | 202 +++++++++++++++++-------------- cpp/src/traversal/tsp.hpp | 57 +++++++-- cpp/src/traversal/tsp_solver.hpp | 147 +++++++++------------- cpp/src/traversal/tsp_utils.hpp | 25 +--- cpp/tests/traversal/tsp_test.cu | 2 +- 6 files changed, 216 insertions(+), 219 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index c3a4f3ec985..8a5474b389c 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -218,7 +218,7 @@ void force_atlas2(GraphCOOView &graph, * @param[out] route Device array containing the returned route. * */ -float traveling_salesperson(raft::handle_t &handle, +float traveling_salesperson(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu index c669246bc49..a28ddbbaa3f 100644 --- a/cpp/src/traversal/tsp.cu +++ b/cpp/src/traversal/tsp.cu @@ -17,13 +17,15 @@ #include #include +#include + #include "tsp.hpp" #include "tsp_solver.hpp" namespace cugraph { namespace detail { -TSP::TSP(raft::handle_t &handle, +TSP::TSP(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, @@ -50,56 +52,77 @@ TSP::TSP(raft::handle_t &handle, max_threads_(handle_.get_device_properties().maxThreadsPerBlock), warp_size_(handle_.get_device_properties().warpSize), sm_count_(handle_.get_device_properties().multiProcessorCount), - restart_batch_(4096) + restart_batch_(8192), + neighbors_vec_((k_ + 1) * nodes_, stream_), + work_vec_(restart_batch_ * ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_), + stream_), + best_x_pos_vec_(1, stream_), + best_y_pos_vec_(1, stream_), + best_route_vec_(1, stream_) { - allocate(); + setup(); } -void TSP::allocate() +void TSP::setup() { - // Scalars - mylock_ = mylock_scalar_.data(); - best_tour_ = best_tour_scalar_.data(); - climbs_ = climbs_scalar_.data(); + mylock_ = mylock_scalar_.data(); - // Vectors - neighbors_vec_.resize((k_ + 1) * nodes_); + neighbors_ = neighbors_vec_.data(); // pre-allocate workspace for climbs, each block needs a separate permutation space and search // buffer. We allocate a work buffer that will store the computed distances, px, py and the route. // We align it on the warp size. - work_vec_.resize(sizeof(float) * restart_batch_ * - ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_)); + work_ = work_vec_.data(); + + results_.best_x_pos = best_x_pos_vec_.data(); + results_.best_y_pos = best_y_pos_vec_.data(); + results_.best_route = best_route_vec_.data(); + results_.best_cost = best_cost_scalar_.data(); +} - // Pointers - neighbors_ = neighbors_vec_.data().get(); - work_ = work_vec_.data().get(); +void TSP::reset_batch() +{ + mylock_scalar_.set_value_zero(stream_); + auto const max{std::numeric_limits::max()}; + best_cost_scalar_.set_value(max, stream_); +} + +void TSP::get_initial_solution(int const batch) +{ + if (!beam_search_) { + random_init<<>>( + work_, x_pos_, y_pos_, vtx_ptr_, nstart_, nodes_, batch, restart_batch_); + CHECK_CUDA(stream_); + } else { + knn_init<<>>( + work_, x_pos_, y_pos_, vtx_ptr_, neighbors_, nstart_, nodes_, k_, batch, restart_batch_); + CHECK_CUDA(stream_); + } } float TSP::compute() { - float valid_coo_dist = 0.f; + float final_cost = 0.f; int num_restart_batches = (restarts_ + restart_batch_ - 1) / restart_batch_; int restart_resid = restarts_ - (num_restart_batches - 1) * restart_batch_; - int global_best = INT_MAX; - float *soln = nullptr; - int *route_sol = nullptr; + int global_best = std::numeric_limits::max(); int best = 0; + std::vector h_x_pos; std::vector h_y_pos; + std::vector h_route; h_x_pos.reserve(nodes_ + 1); h_y_pos.reserve(nodes_ + 1); - - // Stats - int n_timers = 3; - long total_climbs = 0; - std::vector h_times; - struct timeval starttime, endtime; - - // KNN call - knn(); + h_route.reserve(nodes_); + std::vector addr_best_x_pos(1); + std::vector addr_best_y_pos(1); + std::vector addr_best_route(1); + HighResTimer hr_timer; + auto create_timer = [&hr_timer, this](char const *name) { + return VerboseTimer(name, hr_timer, verbose_); + }; if (verbose_) { - std::cout << "Doing " << num_restart_batches - 1 << " batches of size " << restart_batch_ + std::cout << "Doing " << num_restart_batches << " batches of size " << restart_batch_ << ", with " << restart_resid << " tail\n"; std::cout << "configuration: " << nodes_ << " nodes, " << restarts_ << " restart\n"; std::cout << "optimizing graph with kswap = " << kswaps << "\n"; @@ -107,82 +130,75 @@ float TSP::compute() // Tell the cache how we want it to behave cudaFuncSetCacheConfig(search_solution, cudaFuncCachePreferEqual); + best_thread_num_ = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); - int threads = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); - if (verbose_) std::cout << "Calculated best thread number = " << threads << "\n"; + if (verbose_) std::cout << "Calculated best thread number = " << best_thread_num_ << "\n"; - rmm::device_vector times(n_timers * threads + n_timers); - h_times.reserve(n_timers * threads + n_timers); + if (beam_search_) { + auto timer = create_timer("knn"); + knn(); + } - gettimeofday(&starttime, NULL); - for (int b = 0; b < num_restart_batches; ++b) { - reset<<<1, 1, 0, stream_>>>(mylock_, best_tour_, climbs_); - CHECK_CUDA(stream_); + for (auto batch = 0; batch < num_restart_batches; ++batch) { + reset_batch(); + if (batch == num_restart_batches - 1) restart_batch_ = restart_resid; - if (b == num_restart_batches - 1) restart_batch_ = restart_resid; - - search_solution<<>>(mylock_, - best_tour_, - vtx_ptr_, - beam_search_, - k_, - nodes_, - neighbors_, - x_pos_, - y_pos_, - work_, - nstart_, - times.data().get(), - climbs_, - threads); + { + auto timer = create_timer("initial_sol"); + get_initial_solution(batch); + } - CHECK_CUDA(stream_); - cudaDeviceSynchronize(); + { + auto timer = create_timer("search_sol"); + search_solution<<>>( + results_, mylock_, vtx_ptr_, beam_search_, k_, nodes_, x_pos_, y_pos_, work_, nstart_); + CHECK_CUDA(stream_); + } + + { + auto timer = create_timer("optimal_tour"); + get_optimal_tour<<>>(results_, mylock_, work_, nodes_); + CHECK_CUDA(stream_); + } - CUDA_TRY(cudaMemcpy(&best, best_tour_, sizeof(int), cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); + best = best_cost_scalar_.value(stream_); + if (verbose_) std::cout << "Best reported by kernel = " << best << "\n"; if (best < global_best) { global_best = best; - CUDA_TRY(cudaMemcpyFromSymbol(&soln, best_soln, sizeof(void *))); - cudaDeviceSynchronize(); - CUDA_TRY(cudaMemcpyFromSymbol(&route_sol, best_route, sizeof(void *))); - cudaDeviceSynchronize(); + + raft::update_host(addr_best_x_pos.data(), results_.best_x_pos, 1, stream_); + raft::update_host(addr_best_y_pos.data(), results_.best_y_pos, 1, stream_); + raft::update_host(addr_best_route.data(), results_.best_route, 1, stream_); + CUDA_TRY(cudaStreamSynchronize(stream_)); + + raft::copy(h_x_pos.data(), addr_best_x_pos[0], nodes_ + 1, stream_); + raft::copy(h_y_pos.data(), addr_best_y_pos[0], nodes_ + 1, stream_); + raft::copy(h_route.data(), addr_best_route[0], nodes_, stream_); + raft::copy(route_, addr_best_route[0], nodes_, stream_); + CHECK_CUDA(stream_); } - total_climbs += climbs_scalar_.value(stream_); - } - gettimeofday(&endtime, NULL); - double runtime = - endtime.tv_sec + endtime.tv_usec / 1e6 - starttime.tv_sec - starttime.tv_usec / 1e6; - long long moves = 1LL * total_climbs * (nodes_ - 2) * (nodes_ - 1) / 2; - - raft::copy(route_, route_sol, nodes_, stream_); - - CUDA_TRY(cudaMemcpy(h_x_pos.data(), soln, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - CUDA_TRY(cudaMemcpy( - h_y_pos.data(), soln + nodes_ + 1, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i = 0; i < nodes_; ++i) { - if (verbose_) { std::cout << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } - valid_coo_dist += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); } - CUDA_TRY(cudaMemcpy(h_times.data(), - times.data().get(), - sizeof(float) * n_timers * threads + n_timers, - cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); + for (auto i = 0; i < nodes_; ++i) { + if (verbose_) { std::cout << h_route[i] << ": " << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } + final_cost += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); + } if (verbose_) { - std::cout << "Search runtime = " << runtime << ", " << moves * 1e-9 / runtime << " Gmoves/s\n"; + hr_timer.display(std::cout); std::cout << "Optimized tour length = " << global_best << "\n"; - print_times(h_times, n_timers, handle_.get_device(), threads); } - return valid_coo_dist; + return final_cost; } void TSP::knn() @@ -192,17 +208,17 @@ void TSP::knn() int dim = 2; bool row_major_order = false; - rmm::device_vector input(nodes_ * dim); - float *input_ptr = input.data().get(); + rmm::device_uvector input(nodes_ * dim, stream_); + float *input_ptr = input.data(); raft::copy(input_ptr, x_pos_, nodes_, stream_); raft::copy(input_ptr + nodes_, y_pos_, nodes_, stream_); - rmm::device_vector search_data(nodes_ * dim); - float *search_data_ptr = search_data.data().get(); + rmm::device_uvector search_data(nodes_ * dim, stream_); + float *search_data_ptr = search_data.data(); raft::copy(search_data_ptr, input_ptr, nodes_ * dim, stream_); - rmm::device_vector distances(nodes_ * (k_ + 1)); - float *distances_ptr = distances.data().get(); + rmm::device_uvector distances(nodes_ * (k_ + 1), stream_); + float *distances_ptr = distances.data(); std::vector input_vec; std::vector sizes_vec; @@ -226,7 +242,7 @@ void TSP::knn() } } // namespace detail -float traveling_salesperson(raft::handle_t &handle, +float traveling_salesperson(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index b065b779b96..1208f8c8790 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -16,18 +16,28 @@ #pragma once -#include -#include #include + +#include #include + +#include #include #include namespace cugraph { namespace detail { + +struct TSPResults { + float **best_x_pos; + float **best_y_pos; + int **best_route; + int *best_cost; +}; + class TSP { public: - TSP(raft::handle_t &handle, + TSP(raft::handle_t const &handle, int const *vtx_ptr, float const *x_pos, float const *y_pos, @@ -39,14 +49,16 @@ class TSP { bool verbose, int *route); - void allocate(); + void setup(); + void reset_batch(); + void get_initial_solution(int const batch); float compute(); void knn(); ~TSP(){}; private: // Config - raft::handle_t &handle_; + raft::handle_t const &handle_; cudaStream_t stream_; int max_blocks_; int max_threads_; @@ -54,6 +66,7 @@ class TSP { int sm_count_; // how large a grid we want to run, this is fixed int restart_batch_; + int best_thread_num_; // TSP int const *vtx_ptr_; @@ -69,20 +82,42 @@ class TSP { // Scalars rmm::device_scalar mylock_scalar_; - rmm::device_scalar best_tour_scalar_; - rmm::device_scalar climbs_scalar_; + rmm::device_scalar best_cost_scalar_; int *mylock_; - int *best_tour_; - int *climbs_; + int *best_cost_; // Vectors - rmm::device_vector neighbors_vec_; - rmm::device_vector work_vec_; + rmm::device_uvector neighbors_vec_; + rmm::device_uvector work_vec_; + rmm::device_uvector best_x_pos_vec_; + rmm::device_uvector best_y_pos_vec_; + rmm::device_uvector best_route_vec_; int64_t *neighbors_; int *work_; int *work_route_; + TSPResults results_; }; + +class VerboseTimer { + public: + VerboseTimer(char const *name, HighResTimer &hr_timer, bool verbose) + : name_(name), hr_timer_(hr_timer), verbose_(verbose) + { + if (verbose_) hr_timer_.start(name_); + } + + ~VerboseTimer() + { + if (verbose_) hr_timer_.stop(); + } + + private: + const char *name_; + HighResTimer &hr_timer_; + bool verbose_; +}; + } // namespace detail } // namespace cugraph diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp index 20d826cac5c..c7b8cdaaf1d 100644 --- a/cpp/src/traversal/tsp_solver.hpp +++ b/cpp/src/traversal/tsp_solver.hpp @@ -29,29 +29,20 @@ namespace cugraph { namespace detail { -__device__ float *best_soln; -__device__ int *best_route; -extern __shared__ int shbuf[]; - -__global__ void reset(int *mylock, int *best_tour, int *climbs) -{ - *mylock = 0; - *best_tour = INT_MAX; - *climbs = 0; - best_soln = nullptr; - best_route = nullptr; -} - -// random permutation kernel -__device__ void random_init(float const *posx, +__global__ void random_init(int *work, + float const *posx, float const *posy, int const *vtx_ptr, - int *path, - float *px, - float *py, int const nstart, - int const nodes) + int const nodes, + int const batch, + int const restart_batch) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + // Fill values for (int i = threadIdx.x; i <= nodes; i += blockDim.x) { px[i] = posx[i]; @@ -60,15 +51,15 @@ __device__ void random_init(float const *posx, } __syncthreads(); - - if (threadIdx.x == 0) { /* serial permutation as starting point */ + // serial permutation as starting point + if (threadIdx.x == 0) { // swap to start at nstart node raft::swapVals(px[0], px[nstart]); raft::swapVals(py[0], py[nstart]); raft::swapVals(path[0], path[nstart]); curandState rndstate; - curand_init(blockIdx.x, 0, 0, &rndstate); + curand_init(blockIdx.x + (restart_batch * batch), 0, 0, &rndstate); for (int i = 1; i < nodes; i++) { int j = curand(&rndstate) % (nodes - 1 - i) + i; if (i == j) continue; @@ -76,34 +67,37 @@ __device__ void random_init(float const *posx, raft::swapVals(py[i], py[j]); raft::swapVals(path[i], path[j]); } - px[nodes] = px[0]; /* close the loop now, avoid special cases later */ + // close the loop now, avoid special cases later + px[nodes] = px[0]; py[nodes] = py[0]; path[nodes] = path[0]; } } -// Use KNN as a starting solution -__device__ void knn_init(float const *posx, +__global__ void knn_init(int *work, + float const *posx, float const *posy, int const *vtx_ptr, int64_t const *neighbors, - int *buf, - int *path, - float *px, - float *py, int const nstart, int const nodes, - int const K) + int const K, + int const batch, + int const restart_batch) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + for (int i = threadIdx.x; i < nodes; i += blockDim.x) buf[i] = 0; __syncthreads(); if (threadIdx.x == 0) { curandState rndstate; - curand_init(blockIdx.x, 0, 0, &rndstate); + curand_init(blockIdx.x + (restart_batch * batch), 0, 0, &rndstate); int progress = 0; - int initlen = 0; px[0] = posx[nstart]; py[0] = posy[nstart]; @@ -114,7 +108,6 @@ __device__ void knn_init(float const *posx, while (progress < nodes - 1) { // beam search as starting point for (int i = 1; i <= progress; i++) buf[i] = 0; progress = 0; // reset current location in path and visited array - initlen = 0; int randjumps = 0; while (progress < nodes - 1) { int nj = curand(&rndstate) % K; @@ -146,13 +139,11 @@ __device__ void knn_init(float const *posx, px[progress] = posx[head]; py[progress] = posy[head]; path[progress] = vtx_ptr[head]; - initlen += __float2int_rn(euclidean_dist(px, py, progress, progress - 1)); } } px[nodes] = px[nstart]; py[nodes] = py[nstart]; path[nodes] = path[nstart]; - initlen += __float2int_rn(euclidean_dist(px, py, nodes, nstart)); } } @@ -211,10 +202,23 @@ __device__ void two_opt_search( } } -// This function being runned for each block -__device__ void hill_climbing( - float *px, float *py, int *buf, int *path, int *shbuf, int const nodes, int *climbs) +__global__ __launch_bounds__(2048, 2) void search_solution(TSPResults results, + int *mylock, + int const *vtx_ptr, + bool beam_search, + int const K, + int nodes, + float const *posx, + float const *posy, + int *work, + int const nstart) { + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + + __shared__ int shbuf[tilesize]; __shared__ int best_change[kswaps]; __shared__ int best_i[kswaps]; __shared__ int best_j[kswaps]; @@ -249,9 +253,6 @@ __device__ void hill_climbing( two_opt_search(buf, px, py, shbuf, &minchange, &mini, &minj, nodes); __syncthreads(); - // Stats only - if (threadIdx.x == 0) atomicAdd(climbs, 1); - shbuf[threadIdx.x] = minchange; int j = blockDim.x; // warp reduction to find best thread results @@ -338,77 +339,45 @@ __device__ void hill_climbing( } while (minchange < 0 && myswaps < 2 * nodes); } -__device__ void get_optimal_tour( - int *mylock, int *best_tour, float *px, float *py, int *path, int *shbuf, int const nodes) +__global__ void get_optimal_tour(TSPResults results, int *mylock, int *work, int const nodes) { + extern __shared__ int accumulator[]; + int climber_id = blockIdx.x; + int *buf = &work[climber_id * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + // Now find actual length of the last tour, result of the climb int term = 0; for (int i = threadIdx.x; i < nodes; i += blockDim.x) { term += __float2int_rn(euclidean_dist(px, py, i, i + 1)); } - shbuf[threadIdx.x] = term; + accumulator[threadIdx.x] = term; __syncthreads(); int j = blockDim.x; // block level reduction do { int k = (j + 1) / 2; - if ((threadIdx.x + k) < j) { shbuf[threadIdx.x] += shbuf[threadIdx.x + k]; } + if ((threadIdx.x + k) < j) { accumulator[threadIdx.x] += accumulator[threadIdx.x + k]; } j = k; // divide active warp size in half __syncthreads(); } while (j > 1); - term = shbuf[0]; + term = accumulator[0]; if (threadIdx.x == 0) { - atomicMin(best_tour, term); + atomicMin(results.best_cost, term); while (atomicExch(mylock, 1) != 0) ; // acquire - if (best_tour[0] == term) { - best_soln = px; - best_route = path; + if (results.best_cost[0] == term) { + results.best_x_pos[0] = px; + results.best_y_pos[0] = py; + results.best_route[0] = path; } *mylock = 0; // release __threadfence(); } } -__global__ __launch_bounds__(2048, 2) void search_solution(int *mylock, - int *best_tour, - int const *vtx_ptr, - bool beam_search, - int const K, - int nodes, - int64_t const *neighbors, - float const *posx, - float const *posy, - int *work, - int const nstart, - float *times, - int *climbs, - int threads) -{ - int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; - float *px = (float *)(&buf[nodes]); - float *py = &px[nodes + 1]; - int *path = (int *)(&py[nodes + 1]); - __shared__ int shbuf[tilesize]; - clock_t start; - - start = clock64(); - if (!beam_search) - random_init(posx, posy, vtx_ptr, path, px, py, nstart, nodes); - else - knn_init(posx, posy, vtx_ptr, neighbors, buf, path, px, py, nstart, nodes, K); - __syncthreads(); - times[threadIdx.x] = clock64() - start; - - start = clock64(); - hill_climbing(px, py, buf, path, shbuf, nodes, climbs); - __syncthreads(); - times[threads + threadIdx.x + 1] = clock64() - start; - - start = clock64(); - get_optimal_tour(mylock, best_tour, px, py, path, shbuf, nodes); - times[2 * threads + threadIdx.x + 1] = clock64() - start; -} } // namespace detail } // namespace cugraph diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp index 3faa2efea3b..2a3445f1c81 100644 --- a/cpp/src/traversal/tsp_utils.hpp +++ b/cpp/src/traversal/tsp_utils.hpp @@ -26,34 +26,11 @@ namespace cugraph { namespace detail { -__host__ __device__ inline float euclidean_dist(float *px, float *py, int a, int b) +constexpr float euclidean_dist(float *px, float *py, int a, int b) { return sqrtf((px[a] - px[b]) * (px[a] - px[b]) + (py[a] - py[b]) * (py[a] - py[b])); } -static std::vector device_func = {"Find First", "Hill Climbing", "Retrieve Path"}; - -void print_times(std::vector &h_times, int const n_timers, int device, int threads) -{ - int clock_rate; - cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, device); - - double total = 0; - h_times[0] /= (float)clock_rate; - total += h_times[0]; - for (int i = 1; i < n_timers; ++i) { - h_times[i * threads + 1] /= (float)clock_rate; - total += h_times[i * threads + 1]; - } - std::cout << "Stats: \n"; - std::cout << device_func[0] << " time: " << h_times[0] * 1e-3 << " " - << (h_times[0] / total) * 100.0 << "%\n"; - for (int i = 1; i < n_timers; ++i) { - std::cout << device_func[i] << " time: " << h_times[i * threads + 1] * 1e-3 << " " - << (h_times[i * threads + 1] / total) * 100.0 << "%\n"; - } -} - // Get maximum number of threads we can run on based on number of nodes, // shared memory usage, max threads per block and SM, max blocks for SM and registers per SM. int best_thread_count(int nodes, int max_threads, int sm_count, int warp_size) diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 383427a56cf..9ebf464ae3e 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -132,7 +132,7 @@ class Tests_Tsp : public ::testing::TestWithParam { int nodes = load_tsp(param.tsp_file.c_str(), &input); // Device alloc - raft::handle_t handle; + raft::handle_t const handle; rmm::device_uvector vertices(static_cast(nodes), nullptr); rmm::device_uvector route(static_cast(nodes), nullptr); rmm::device_uvector x_pos(static_cast(nodes), nullptr); From 1f0f14eba2e6253423b1a58ca38989261308df6c Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 26 Mar 2021 09:31:29 -0500 Subject: [PATCH 31/51] MS BFS python APIs + EgoNet updates (#1469) There are various things in this PR. Multi-Seed (MS) BFS: - API tentative - Saving research on memory feasability helper function (not in production) - Saving research on running the current BFS concurrently with streams and threads for analysis perf comparison (not in production) EgoNet: - Multithreading in EgoNet which deserializes execution and comes with mild performance improvements on large sizes - Some cleanup Authors: - Alex Fender (@afender) Approvers: - Chuck Hastings (@ChuckHastings) - @Iroy30 - Brad Rees (@BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1469 --- cpp/src/community/egonet.cu | 19 +- cpp/tests/CMakeLists.txt | 16 ++ cpp/tests/community/egonet_test.cu | 283 +++++++++++------------ cpp/tests/experimental/ms_bfs_test.cpp | 301 +++++++++++++++++++++++++ cpp/tests/experimental/streams.cu | 44 ++++ python/cugraph/__init__.py | 4 +- python/cugraph/tests/test_egonet.py | 32 +-- python/cugraph/traversal/__init__.py | 4 +- python/cugraph/traversal/ms_bfs.py | 282 +++++++++++++++++++++++ python/cugraph/utilities/utils.py | 106 +++++---- 10 files changed, 868 insertions(+), 223 deletions(-) create mode 100644 cpp/tests/experimental/ms_bfs_test.cpp create mode 100644 cpp/tests/experimental/streams.cu create mode 100644 python/cugraph/traversal/ms_bfs.py diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 067d27f9a92..336a5c939b8 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -79,7 +79,12 @@ extract( // Streams will allocate concurrently later std::vector> reached{}; - reached.reserve(handle.get_num_internal_streams()); + reached.reserve(n_subgraphs); + for (vertex_t i = 0; i < n_subgraphs; i++) { + // Allocations and operations are attached to the worker stream + rmm::device_uvector local_reach(v, handle.get_internal_stream_view(i)); + reached.push_back(std::move(local_reach)); + } // h_source_vertex[i] is used by other streams in the for loop user_stream_view.synchronize(); @@ -87,15 +92,13 @@ extract( HighResTimer hr_timer; hr_timer.start("ego_neighbors"); #endif + +#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { // get light handle from worker pool raft::handle_t light_handle(handle, i); auto worker_stream_view = light_handle.get_stream_view(); - // Allocations and operations are attached to the worker stream - rmm::device_uvector local_reach(v, worker_stream_view); - reached.push_back(std::move(local_reach)); - // BFS with cutoff // consider adding a device API to BFS (ie. accept source on the device) rmm::device_uvector predecessors(v, worker_stream_view); // not used @@ -149,10 +152,10 @@ extract( neighbors.resize(h_neighbors_offsets[n_subgraphs]); user_stream_view.synchronize(); - // Construct the neighboors list concurrently +// Construct the neighboors list concurrently +#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { - raft::handle_t light_handle(handle, i); - auto worker_stream_view = light_handle.get_stream_view(); + auto worker_stream_view = handle.get_internal_stream_view(i); thrust::copy(rmm::exec_policy(worker_stream_view), reached[i].begin(), reached[i].end(), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5571cf5f124..1dc4a5d3eaa 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -318,6 +318,13 @@ set(MST_TEST_SRC ConfigureTest(MST_TEST "${MST_TEST_SRC}") +################################################################################################### +# - Experimental stream tests ----------------------------------------------------- + +set(EXPERIMENTAL_STREAM_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/streams.cu") + +ConfigureTest(EXPERIMENTAL_STREAM "${EXPERIMENTAL_STREAM_SRCS}" "") ################################################################################################### # - Experimental R-mat graph generation tests ----------------------------------------------------- @@ -375,6 +382,15 @@ set(EXPERIMENTAL_BFS_TEST_SRCS ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") +################################################################################################### +# - Experimental BFS tests ------------------------------------------------------------------------ + +set(EXPERIMENTAL_MSBFS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/ms_bfs_test.cpp") + +ConfigureTest(EXPERIMENTAL_MSBFS_TEST "${EXPERIMENTAL_MSBFS_TEST_SRCS}") + + ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index a9224b42bc1..e7fea43be42 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -182,150 +182,141 @@ INSTANTIATE_TEST_CASE_P( // For perf analysis /* INSTANTIATE_TEST_CASE_P( -simple_test, -Tests_InducedEgo, -::testing::Values( -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), -InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), -InducedEgo_Usecase( -"test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), -InducedEgo_Usecase( -"test/datasets/soc-LiveJournal1.mtx", -std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, - 2, - false), - InducedEgo_Usecase( - "test/datasets/soc-LiveJournal1.mtx", - std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, - 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, - 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, - 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, - 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, - 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, - 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, - 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, - 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, - 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, - 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, - 2, - false), - InducedEgo_Usecase( - "test/datasets/soc-LiveJournal1.mtx", - std::vector{ - 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, - 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, - 3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, - 1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, - 320953, 2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, - 847662, 3277365, 3957318, 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, - 1163406, 3109528, 3221856, 4714426, 2382774, 37828, 4433616, 3283229, 591911, - 4200188, 442522, 872207, 2437601, 741003, 266241, 914618, 3626195, 2021080, - 4679624, 777476, 2527796, 1114017, 640142, 49259, 4069879, 3869098, 1105040, - 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105, - 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, 984983, 3114832, 1967741, - 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, 686026, 3989015, - 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, 2186957, - 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, - 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, - 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, - 2606530, 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, - 134931, 736397, 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, - 1881883, 4757859, 3596257, 2358088, 2578758, 447504, 590720, 1717038, 1869795, - 1133885, 3027521, 840312, 2818881, 3654321, 2730947, 353585, 1134903, 2223378, - 1508824, 3662521, 1363776, 2712071, 288441, 1204581, 3502242, 4645567, 2767267, - 1514366, 3956099, 1422145, 1216608, 2253360, 189132, 4238225, 1345783, 451571, 1599442, - 3237284, 4711405, 929446, 1857675, 150759, 1277633, 761210, 138628, 1026833, - 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, 2044964, 716256, 1660632, - 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, 1870953, 1516385, - 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, 4285177, - 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, - 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, - 4410835, 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, - 1600667, 2176195, 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, - 1647273, 3044136, 950354, 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, - 3867343, 72329, 919189, 992521, 3445975, 4712557, 4680974, 188419, 2612093, - 1991268, 3566207, 2281468, 3859078, 2492806, 3398628, 763441, 2679107, 2554420, - 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, 4013060, 3617653, 2040022, - 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, 1083926, 503974, 3529226, - 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, 3022790, 4316365, 76365, - 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, 2938808, 562788, - 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, 214467, - 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, - 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, - 1513424, 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, - 3108096, 4311775, 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, - 1861797, 3566460, 4537673, 1164093, 3499764, 4553071, 3518985, 847658, 918948, - 2922351, 1056144, 652895, 1013195, 780505, 1702928, 3562838, 1432719, 2405207, - 1054920, 641647, 2240939, 3617702, 383165, 652641, 879593, 1810739, 2096385, - 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, 2422190, 527647, 1251821, - 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, 2433139, 1710383, - 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, 16864, 2081770, - 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, 2630042, - 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, - 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, - 481509, 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, - 4002180, 4718138, 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, - 3828090, 3253691, 4839313, 1203624, 584938, 3901482, 1747543, 1572737, 3533226, - 774708, 1691195, 1037110, 1557763, 225120, 4424243, 3524086, 1717663, 4332507, - 3513592, 4274932, 1232118, 873498, 1416042, 2488925, 111391, 4704545, 4492545, - 445317, 1584812, 2187737, 2471948, 3731678, 219255, 2282627, 2589971, 2372185, - 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, 3184084, 3690756, - 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, 2722165, - 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, - 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, - 2596952, 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, - 2174584, 587481, 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, - 4819428, 2591357, 48490, 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, - 37251, 3729300, 2726300, 644966, 1623020, 1419070, 4646747, 2417222, 2680238, - 2561083, 1793801, 2349366, 339747, 611366, 4684147, 4356907, 1277161, 4510381, - 3218352, 4161658, 3200733, 1172372, 3997786, 3169266, 3353418, 2248955, 2875885, - 2365369, 498208, 2968066, 2681505, 2059048, 2097106, 3607540, 1121504, 2016789, - 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, 4046672, 1544367, - 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, 3690724, - 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, - 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, - 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, - 4687548, 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, - 4086775, 615155, 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, - 2672602, 838828, 4051647, 1709120, 3074610, 693235, 4356087, 3018806, 239410, - 2431497, 691186, 766276, 4462126, 859155, 2370304, 1571808, 1938673, 1694955, - 3871296, 4245059, 3987376, 301524, 2512461, 3410437, 3300380, 684922, 4581995, - 3599557, 683515, 1850634, 3704678, 1937490, 2035591, 3718533, 2065879, 3160765, - 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, 713633, 1976262, 135946, - 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, 4179598, 961045, - 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, 4719693, - 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, - 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, - 3504814, 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, - 4730666, 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, - 4468651, 2478792, 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, - 3218600, 1811100, 3443356, 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, - 4782811, 3144712, 3523466, 1491315, 3955852, 1838410, 3164028, 1092543, 776459, - 2959379, 2541744, 4064418, 3908320, 2854145, 3960709, 1348188, 977678, 853619, - 1304291, 2848702, 1657913, 1319826, 3322665, 788037, 2913686, 4471279, 1766285, 348304, - 56570, 1892118, 4017244, 401006, 3524539, 4310134, 1624693, 4081113, 957511, 849400, - 129975, 2616130, 378537, 1556787, 3916162, 1039980, 4407778, 2027690, 4213675, - 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, 1255588, 1947964, - 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, 1123513, - 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, 41760, - 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, - 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, - 4335712, 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, - 1382747, 3537242, 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, - 119369, 2856973, 2945854, 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, - 2886508, 1573965, 990618, 3053734, 2918742, 4508753, 1032149, 60943, 4291620, - 722607, 2883224, 169359, 4356585, 3725543, 3678729, 341673, 3592828, 4077251, - 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, 3113385, 4660578, 2539973, - 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, 3796951, 956299, 141730, - 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, 3573511, 314081, 577688, - 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, 1175290, 3749667, - 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, 2079145, - 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, -2, -false)));*/ + simple_test, + Tests_InducedEgo, + ::testing::Values( + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 1, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 2, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 3, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 4, false), + InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{0}, 5, false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + InducedEgo_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, + 984983, 3114832, 1967741, 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, + 686026, 3989015, 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, + 2186957, 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, 2606530, + 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, 134931, 736397, + 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, 1881883, 4757859, 3596257, + 2358088, 2578758, 447504, 590720, 1717038, 1869795, 1133885, 3027521, 840312, 2818881, + 3654321, 2730947, 353585, 1134903, 2223378, 1508824, 3662521, 1363776, 2712071, 288441, + 1204581, 3502242, 4645567, 2767267, 1514366, 3956099, 1422145, 1216608, 2253360, 189132, + 4238225, 1345783, 451571, 1599442, 3237284, 4711405, 929446, 1857675, 150759, 1277633, + 761210, 138628, 1026833, 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, + 2044964, 716256, 1660632, 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, + 1870953, 1516385, 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, + 4285177, 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, 4410835, + 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, 1600667, 2176195, + 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, 1647273, 3044136, 950354, + 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, 3867343, 72329, 919189, 992521, + 3445975, 4712557, 4680974, 188419, 2612093, 1991268, 3566207, 2281468, 3859078, 2492806, + 3398628, 763441, 2679107, 2554420, 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, + 4013060, 3617653, 2040022, 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, + 1083926, 503974, 3529226, 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, + 3022790, 4316365, 76365, 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, + 2938808, 562788, 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, + 214467, 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, 1513424, + 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, 3108096, 4311775, + 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, 1861797, 3566460, 4537673, + 1164093, 3499764, 4553071, 3518985, 847658, 918948, 2922351, 1056144, 652895, 1013195, + 780505, 1702928, 3562838, 1432719, 2405207, 1054920, 641647, 2240939, 3617702, 383165, + 652641, 879593, 1810739, 2096385, 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, + 2422190, 527647, 1251821, 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, + 2433139, 1710383, 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, + 16864, 2081770, 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, + 2630042, 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, 481509, + 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, 4002180, 4718138, + 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, 3828090, 3253691, 4839313, + 1203624, 584938, 3901482, 1747543, 1572737, 3533226, 774708, 1691195, 1037110, 1557763, + 225120, 4424243, 3524086, 1717663, 4332507, 3513592, 4274932, 1232118, 873498, 1416042, + 2488925, 111391, 4704545, 4492545, 445317, 1584812, 2187737, 2471948, 3731678, 219255, + 2282627, 2589971, 2372185, 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, + 3184084, 3690756, 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, + 2722165, 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, 2596952, + 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, 2174584, 587481, + 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, 4819428, 2591357, 48490, + 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, 37251, 3729300, 2726300, + 644966, 1623020, 1419070, 4646747, 2417222, 2680238, 2561083, 1793801, 2349366, 339747, + 611366, 4684147, 4356907, 1277161, 4510381, 3218352, 4161658, 3200733, 1172372, 3997786, + 3169266, 3353418, 2248955, 2875885, 2365369, 498208, 2968066, 2681505, 2059048, 2097106, + 3607540, 1121504, 2016789, 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, + 4046672, 1544367, 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, + 3690724, 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, 4687548, + 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, 4086775, 615155, + 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, 2672602, 838828, 4051647, + 1709120, 3074610, 693235, 4356087, 3018806, 239410, 2431497, 691186, 766276, 4462126, + 859155, 2370304, 1571808, 1938673, 1694955, 3871296, 4245059, 3987376, 301524, 2512461, + 3410437, 3300380, 684922, 4581995, 3599557, 683515, 1850634, 3704678, 1937490, 2035591, + 3718533, 2065879, 3160765, 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, + 713633, 1976262, 135946, 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, + 4179598, 961045, 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, + 4719693, 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, 3504814, + 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, 4730666, + 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, 4468651, 2478792, + 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, 3218600, 1811100, 3443356, + 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, 4782811, 3144712, 3523466, 1491315, + 3955852, 1838410, 3164028, 1092543, 776459, 2959379, 2541744, 4064418, 3908320, 2854145, + 3960709, 1348188, 977678, 853619, 1304291, 2848702, 1657913, 1319826, 3322665, 788037, + 2913686, 4471279, 1766285, 348304, 56570, 1892118, 4017244, 401006, 3524539, 4310134, + 1624693, 4081113, 957511, 849400, 129975, 2616130, 378537, 1556787, 3916162, 1039980, + 4407778, 2027690, 4213675, 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, + 1255588, 1947964, 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, + 1123513, 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, + 41760, 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, 4335712, + 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, 1382747, 3537242, + 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, 119369, 2856973, 2945854, + 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, 2886508, 1573965, 990618, 3053734, + 2918742, 4508753, 1032149, 60943, 4291620, 722607, 2883224, 169359, 4356585, 3725543, + 3678729, 341673, 3592828, 4077251, 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, + 3113385, 4660578, 2539973, 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, + 3796951, 956299, 141730, 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, + 3573511, 314081, 577688, 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, + 1175290, 3749667, 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, + 2079145, 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, + 2, + false))); +*/ CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp new file mode 100644 index 00000000000..264382c22a3 --- /dev/null +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef struct MsBfs_Usecase_t { + std::string graph_file_full_path{}; + std::vector sources{}; + int32_t radius; + bool test_weighted{false}; + + MsBfs_Usecase_t(std::string const& graph_file_path, + std::vector const& sources, + int32_t radius, + bool test_weighted) + : sources(sources), radius(radius), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} MsBfs_Usecase; + +class Tests_MsBfs : public ::testing::TestWithParam { + public: + Tests_MsBfs() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(MsBfs_Usecase const& configuration) + { + auto n_seeds = configuration.sources.size(); + int n_streams = std::min(n_seeds, static_cast(128)); + raft::handle_t handle(n_streams); + + cugraph::experimental::graph_t graph( + handle); + std::tie(graph, std::ignore) = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + auto graph_view = graph.view(); + // Streams will allocate concurrently later + std::vector> d_distances{}; + std::vector> d_predecessors{}; + + d_distances.reserve(n_seeds); + d_predecessors.reserve(n_seeds); + for (vertex_t i = 0; i < n_seeds; i++) { + // Allocations and operations are attached to the worker stream + rmm::device_uvector tmp_distances(graph_view.get_number_of_vertices(), + handle.get_internal_stream_view(i)); + rmm::device_uvector tmp_predecessors(graph_view.get_number_of_vertices(), + handle.get_internal_stream_view(i)); + + d_distances.push_back(std::move(tmp_distances)); + d_predecessors.push_back(std::move(tmp_predecessors)); + } + + std::vector radius(n_seeds); + std::generate(radius.begin(), radius.end(), [n = 0]() mutable { return (n++ % 12 + 1); }); + + // warm up + cugraph::experimental::bfs(handle, + graph_view, + d_distances[0].begin(), + d_predecessors[0].begin(), + static_cast(configuration.sources[0]), + false, + radius[0]); + + // one by one + HighResTimer hr_timer; + hr_timer.start("bfs"); + cudaProfilerStart(); + for (vertex_t i = 0; i < n_seeds; i++) { + cugraph::experimental::bfs(handle, + graph_view, + d_distances[i].begin(), + d_predecessors[i].begin(), + static_cast(configuration.sources[i]), + false, + radius[i]); + } + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); + + // concurrent + hr_timer.start("bfs"); + cudaProfilerStart(); +#pragma omp parallel for + for (vertex_t i = 0; i < n_seeds; i++) { + raft::handle_t light_handle(handle, i); + auto worker_stream_view = light_handle.get_stream_view(); + cugraph::experimental::bfs(light_handle, + graph_view, + d_distances[i].begin(), + d_predecessors[i].begin(), + static_cast(configuration.sources[i]), + false, + radius[i]); + } + + cudaProfilerStop(); + hr_timer.stop(); + hr_timer.display(std::cout); + } +}; + +TEST_P(Tests_MsBfs, DISABLED_CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} +/* +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MsBfs, + ::testing::Values( + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{0}, 1, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{0}, 2, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), + MsBfs_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), + MsBfs_Usecase( + "test/datasets/karate.mtx", std::vector{5, 9, 3, 10, 12, 13}, 2, true))); +*/ +// For perf analysis + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MsBfs, + ::testing::Values( + MsBfs_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector{363617}, 2, false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755}, + 2, + false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627}, + 2, + false), + MsBfs_Usecase( + "test/datasets/soc-LiveJournal1.mtx", + std::vector{ + 363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755, + 2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829, 3341686, + 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899, 1213033, 4840102, + 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318, 320953, 2388331, 520808, + 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680, 847662, 3277365, 3957318, + 3455123, 2454259, 670953, 4465677, 1027332, 2560721, 89061, 1163406, 3109528, 3221856, + 4714426, 2382774, 37828, 4433616, 3283229, 591911, 4200188, 442522, 872207, 2437601, + 741003, 266241, 914618, 3626195, 2021080, 4679624, 777476, 2527796, 1114017, 640142, + 49259, 4069879, 3869098, 1105040, 4707804, 3208582, 3325885, 1450601, 4072548, 2037062, + 2029646, 4575891, 1488598, 79105, 4827273, 3795434, 4647518, 4733397, 3980718, 1184627, + 984983, 3114832, 1967741, 1599818, 144593, 2698770, 2889449, 2495550, 1053813, 1193622, + 686026, 3989015, 2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429, 4526841, + 2186957, 3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971, + 2199672, 275811, 2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400, + 2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134, 2606530, + 2493046, 4706121, 1498980, 4576225, 1271339, 44832, 1875673, 4664940, 134931, 736397, + 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464, 1881883, 4757859, 3596257, + 2358088, 2578758, 447504, 590720, 1717038, 1869795, 1133885, 3027521, 840312, 2818881, + 3654321, 2730947, 353585, 1134903, 2223378, 1508824, 3662521, 1363776, 2712071, 288441, + 1204581, 3502242, 4645567, 2767267, 1514366, 3956099, 1422145, 1216608, 2253360, 189132, + 4238225, 1345783, 451571, 1599442, 3237284, 4711405, 929446, 1857675, 150759, 1277633, + 761210, 138628, 1026833, 2599544, 2464737, 989203, 3399615, 2144292, 216142, 637312, + 2044964, 716256, 1660632, 1762919, 4784357, 2213415, 2764769, 291806, 609772, 3264819, + 1870953, 1516385, 235647, 1045474, 2664957, 819095, 1824119, 4045271, 4448109, 1676788, + 4285177, 1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024, + 4533449, 479930, 2010695, 672994, 3542039, 3176455, 26352, 2137735, 866910, 4410835, + 2623982, 3603159, 2555625, 2765653, 267865, 2015523, 1009052, 4713994, 1600667, 2176195, + 3179631, 4570390, 2018424, 3356384, 1784287, 894861, 3622099, 1647273, 3044136, 950354, + 1491760, 3416929, 3757300, 2244912, 4129215, 1600848, 3867343, 72329, 919189, 992521, + 3445975, 4712557, 4680974, 188419, 2612093, 1991268, 3566207, 2281468, 3859078, 2492806, + 3398628, 763441, 2679107, 2554420, 2130132, 4664374, 1182901, 3890770, 4714667, 4209303, + 4013060, 3617653, 2040022, 3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, + 1083926, 503974, 3529226, 1650522, 1900976, 542080, 3423929, 3418905, 878165, 4701703, + 3022790, 4316365, 76365, 4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, + 2938808, 562788, 3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626, 3399464, + 214467, 3343982, 1811854, 3189045, 4272117, 4701563, 424807, 4341116, 760545, 4674683, + 1538018, 386762, 194237, 2162719, 1694433, 943728, 2389036, 2196653, 3085571, 1513424, + 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889, 3108096, 4311775, + 3569480, 886705, 733256, 2477493, 1735412, 2960895, 1983781, 1861797, 3566460, 4537673, + 1164093, 3499764, 4553071, 3518985, 847658, 918948, 2922351, 1056144, 652895, 1013195, + 780505, 1702928, 3562838, 1432719, 2405207, 1054920, 641647, 2240939, 3617702, 383165, + 652641, 879593, 1810739, 2096385, 4497865, 4768530, 1743968, 3582014, 1025009, 3002122, + 2422190, 527647, 1251821, 2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, + 2433139, 1710383, 1127734, 4362316, 711588, 817839, 3214775, 910077, 1313768, 2382229, + 16864, 2081770, 3095420, 3195272, 548711, 2259860, 1167323, 2435974, 425238, 2085179, + 2630042, 2632881, 2867923, 3703565, 1037695, 226617, 4379130, 1541468, 3581937, 605965, + 1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384, 481509, + 2900382, 934766, 2986774, 1767669, 298593, 2502539, 139296, 3794229, 4002180, 4718138, + 2909238, 423691, 3023810, 2784924, 2760160, 1971980, 316683, 3828090, 3253691, 4839313, + 1203624, 584938, 3901482, 1747543, 1572737, 3533226, 774708, 1691195, 1037110, 1557763, + 225120, 4424243, 3524086, 1717663, 4332507, 3513592, 4274932, 1232118, 873498, 1416042, + 2488925, 111391, 4704545, 4492545, 445317, 1584812, 2187737, 2471948, 3731678, 219255, + 2282627, 2589971, 2372185, 4609096, 3673961, 2524410, 12823, 2437155, 3015974, 4188352, + 3184084, 3690756, 1222341, 1278376, 3652030, 4162647, 326548, 3930062, 3926100, 1551222, + 2722165, 4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582, + 2676699, 1878842, 260174, 3129900, 4379993, 182347, 2189338, 3783616, 2616666, 2596952, + 243007, 4179282, 2730, 1939894, 2332032, 3335636, 182332, 3112260, 2174584, 587481, + 4527368, 3154106, 3403059, 673206, 2150292, 446521, 1600204, 4819428, 2591357, 48490, + 2917012, 2285923, 1072926, 2824281, 4364250, 956033, 311938, 37251, 3729300, 2726300, + 644966, 1623020, 1419070, 4646747, 2417222, 2680238, 2561083, 1793801, 2349366, 339747, + 611366, 4684147, 4356907, 1277161, 4510381, 3218352, 4161658, 3200733, 1172372, 3997786, + 3169266, 3353418, 2248955, 2875885, 2365369, 498208, 2968066, 2681505, 2059048, 2097106, + 3607540, 1121504, 2016789, 1762605, 3138431, 866081, 3705757, 3833066, 2599788, 760816, + 4046672, 1544367, 2983906, 4842911, 209599, 1250954, 3333704, 561212, 4674336, 2831841, + 3690724, 2929360, 4830834, 1177524, 2487687, 3525137, 875283, 651241, 2110742, 1296646, + 1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230, + 1741419, 729913, 2860117, 2362381, 1199807, 2424230, 177824, 125948, 2722701, 4687548, + 1140771, 3232742, 4522020, 4376360, 1125603, 590312, 2481884, 138951, 4086775, 615155, + 3395781, 4587272, 283209, 568470, 4296185, 4344150, 2454321, 2672602, 838828, 4051647, + 1709120, 3074610, 693235, 4356087, 3018806, 239410, 2431497, 691186, 766276, 4462126, + 859155, 2370304, 1571808, 1938673, 1694955, 3871296, 4245059, 3987376, 301524, 2512461, + 3410437, 3300380, 684922, 4581995, 3599557, 683515, 1850634, 3704678, 1937490, 2035591, + 3718533, 2065879, 3160765, 1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, + 713633, 1976262, 135946, 3641535, 632083, 13414, 4217765, 4137712, 2550250, 3281035, + 4179598, 961045, 2020694, 4380006, 1345936, 289162, 1359035, 770872, 4509911, 3947317, + 4719693, 248568, 2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222, 2883506, + 3339149, 3998335, 491017, 1609022, 2648112, 742132, 649609, 4206953, 3131106, 3504814, + 3344486, 611721, 3215620, 2856233, 4447505, 1949222, 1868345, 712710, 6966, 4730666, + 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476, 4468651, 2478792, + 3473580, 3783357, 1852991, 1648485, 871319, 1670723, 4458328, 3218600, 1811100, 3443356, + 2233873, 3035207, 2548692, 3337891, 3773674, 1552957, 4782811, 3144712, 3523466, 1491315, + 3955852, 1838410, 3164028, 1092543, 776459, 2959379, 2541744, 4064418, 3908320, 2854145, + 3960709, 1348188, 977678, 853619, 1304291, 2848702, 1657913, 1319826, 3322665, 788037, + 2913686, 4471279, 1766285, 348304, 56570, 1892118, 4017244, 401006, 3524539, 4310134, + 1624693, 4081113, 957511, 849400, 129975, 2616130, 378537, 1556787, 3916162, 1039980, + 4407778, 2027690, 4213675, 839863, 683134, 75805, 2493150, 4215796, 81587, 751845, + 1255588, 1947964, 1950470, 859401, 3077088, 3931110, 2316256, 1523761, 4527477, 4237511, + 1123513, 4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159, 2955887, + 41760, 2313998, 496912, 3791570, 3904792, 3613654, 873959, 127076, 2537797, 2458107, + 4543265, 3661909, 26828, 271816, 17854, 2461269, 1776042, 1573899, 3409957, 4335712, + 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600, 1382747, 3537242, + 4536743, 4714155, 3788371, 3570849, 173741, 211962, 4377778, 119369, 2856973, 2945854, + 1508054, 4503932, 3141566, 1842177, 3448683, 3384614, 2886508, 1573965, 990618, 3053734, + 2918742, 4508753, 1032149, 60943, 4291620, 722607, 2883224, 169359, 4356585, 3725543, + 3678729, 341673, 3592828, 4077251, 3382936, 3885685, 4630994, 1286698, 4449616, 1138430, + 3113385, 4660578, 2539973, 4562286, 4085089, 494737, 3967610, 2130702, 1823755, 1369324, + 3796951, 956299, 141730, 935144, 4381893, 4412545, 1382250, 3024476, 2364546, 3396164, + 3573511, 314081, 577688, 4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, + 1175290, 3749667, 1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, + 2079145, 2975635, 535071, 4287509, 3281107, 39606, 3115500, 3204573, 722131, 3124073}, + 2, + false))); +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/streams.cu b/cpp/tests/experimental/streams.cu new file mode 100644 index 00000000000..c89ffe1e532 --- /dev/null +++ b/cpp/tests/experimental/streams.cu @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include "gtest/gtest.h" +struct StreamTest : public ::testing::Test { +}; +TEST_F(StreamTest, basic_test) +{ + int n_streams = 4; + raft::handle_t handle(n_streams); + + const size_t intput_size = 4096; + +#pragma omp parallel for + for (int i = 0; i < n_streams; i++) { + rmm::device_uvector u(intput_size, handle.get_internal_stream_view(i)), + v(intput_size, handle.get_internal_stream_view(i)); + thrust::transform(rmm::exec_policy(handle.get_internal_stream_view(i)), + u.begin(), + u.end(), + v.begin(), + v.begin(), + 2 * thrust::placeholders::_1 + thrust::placeholders::_2); + } +} \ No newline at end of file diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 8a847d1f1d4..11ba2d6ef96 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -82,7 +82,9 @@ shortest_path, filter_unreachable, shortest_path_length, - traveling_salesperson + traveling_salesperson, + concurrent_bfs, + multi_source_bfs, ) from cugraph.tree import minimum_spanning_tree, maximum_spanning_tree diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index 009fd1252f1..b259c2567dc 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -58,29 +58,6 @@ def test_ego_graph_nx(graph_file, seed, radius): @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) def test_batched_ego_graphs(graph_file, seeds, radius): - """ - Compute the induced subgraph of neighbors for each node in seeds - within a given radius. - Parameters - ---------- - G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix - Graph or matrix object, which should contain the connectivity - information. Edge weights, if present, should be single or double - precision floating point values. - seeds : cudf.Series - Specifies the seeds of the induced egonet subgraphs - radius: integer, optional - Include all neighbors of distance<=radius from n. - - Returns - ------- - ego_edge_lists : cudf.DataFrame - GPU data frame containing all induced sources identifiers, - destination identifiers, edge weights - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each seed. - """ gc.collect() # Nx @@ -93,9 +70,8 @@ def test_batched_ego_graphs(graph_file, seeds, radius): df, offsets = cugraph.batched_ego_graphs(Gnx, seeds, radius=radius) for i in range(len(seeds)): ego_nx = nx.ego_graph(Gnx, seeds[i], radius=radius) - ego_df = df[offsets[i]:offsets[i+1]] - ego_cugraph = nx.from_pandas_edgelist(ego_df, - source="src", - target="dst", - edge_attr="weight") + ego_df = df[offsets[i]:offsets[i + 1]] + ego_cugraph = nx.from_pandas_edgelist( + ego_df, source="src", target="dst", edge_attr="weight" + ) assert nx.is_isomorphic(ego_nx, ego_cugraph) diff --git a/python/cugraph/traversal/__init__.py b/python/cugraph/traversal/__init__.py index 5944ebe0865..e74266d29fc 100644 --- a/python/cugraph/traversal/__init__.py +++ b/python/cugraph/traversal/__init__.py @@ -17,6 +17,8 @@ sssp, shortest_path, filter_unreachable, - shortest_path_length + shortest_path_length, ) from cugraph.traversal.traveling_salesperson import traveling_salesperson + +from cugraph.traversal.ms_bfs import concurrent_bfs, multi_source_bfs diff --git a/python/cugraph/traversal/ms_bfs.py b/python/cugraph/traversal/ms_bfs.py new file mode 100644 index 00000000000..e4b799e30e4 --- /dev/null +++ b/python/cugraph/traversal/ms_bfs.py @@ -0,0 +1,282 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cudf + +# from cugraph.structure.graph import Graph, DiGraph +# from cugraph.utilities.utils import get_device_memory_info +import warnings + + +def _get_feasibility(G, sources, components=None, depth_limit=None): + """ + Evaluate the feasibility for breadth first traversal from multiple sources + in a graph. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + The adjacency list will be computed if not already present. + + sources : cudf.Series + Subset of vertices from which the traversals start. A BFS is run for + each source in the Series. + The size of the series should be at least one and cannot exceed + the size of the graph. + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + components : cudf.DataFrame, optional, default=None + GPU Dataframe containing the component information. + Passing this information may impact the return type. + When no component information is passed BFS uses one component + behavior settings. + + components['vertex'] : cudf.Series + vertex IDs + components['color'] : cudf.Series + component IDs/color for vertices. + + Returns + ------- + mem_footprint : integer + Estimated memory foot print size in Bytes + """ + + # Fixme not implemented in RMM yet + # using 96GB upper bound for now + # mem = get_device_memory_info() + mem = 9.6e10 + n_sources = sources.size + V = G.number_of_vertices() + E = G.number_of_edges() + mean_component_sz = V + n_components = 1 + + # Retreive types + size_of_v = 4 + size_of_e = 4 + size_of_w = 0 + if G.adjlist.weights is not None: + if G.adjlist.weights.dtype is np.float64: + size_of_w = 8 + else: + size_of_w = 4 + if G.adjlist.offsets.dtype is np.float64: + size_of_v = 8 + if G.adjlist.indices.dtype is np.float64: + size_of_e = 8 + + # Graph size + G_sz = E * size_of_e + E * size_of_w + V * size_of_v + + # The impact of depth limit depends on the sparsity + # pattern and diameter. We cannot leverage it without + # traversing the full dataset a the moment. + + # dense output + output_sz = n_sources * 2 * V * size_of_v + + # sparse output + if components is not None: + tmp = components["color"].value_counts() + n_components = tmp.size + if n_sources / n_components > 100: + warnings.warn( + "High number of seeds per component result in large output." + ) + mean_component_sz = tmp.mean() + output_sz = mean_component_sz * n_sources * 2 * size_of_e + + # counting 10% for context, handle and temporary allocations + mem_footprint = (G_sz + output_sz) * 1.1 + if mem_footprint > mem: + warnings.warn(f"Cannot execute in-memory :{mem_footprint} Bytes") + + return mem_footprint + + +def concurrent_bfs(Graphs, sources, depth_limit=None, offload=False): + """ + Find the breadth first traversals of multiple graphs with multiple sources + in each graph. + + Parameters + ---------- + Graphs : list of cugraph.Graph or cugraph.DiGraph + The adjacency lists will be computed if not already present. + + sources : list of cudf.Series + For each graph, subset of vertices from which the traversals start. + A BFS is run in Graphs[i] for each source in the Series at sources[i]. + The size of this list must match the size of the graph list. + The size of each Series (ie. the number of sources per graph) + is flexible, but cannot exceed the size of the corresponding graph. + + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + offload : boolean, optional, default=False + Indicates if output should be written to the disk. + When not provided, the algorithms decides if offloading is needed + based on the input parameters. + + Returns + ------- + Return type is decided based on the input parameters (size of + sources, size of the graph, number of graphs and offload setting) + + If G is a cugraph.Graph and output fits in memory: + BFS_edge_lists : cudf.DataFrame + GPU data frame containing all BFS edges + source_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each source. + + If offload is True, or if the output does not fit in memory : + Writes csv files containing BFS output to the disk. + """ + raise NotImplementedError( + "concurrent_bfs is coming soon! Please up vote the github issue 1465\ + to help us prioritize" + ) + if not isinstance(Graphs, list): + raise TypeError( + "Graphs should be a list of cugraph.Graph or cugraph.DiGraph" + ) + if not isinstance(sources, list): + raise TypeError("sources should be a list of cudf.Series") + if len(Graphs) != len(sources): + raise ValueError( + "The size of the sources list must match\ + the size of the graph list." + ) + if offload is True: + raise NotImplementedError( + "Offloading is coming soon! Please up vote the github issue 1461\ + to help us prioritize" + ) + + # Consolidate graphs in a single graph and record components + + # Renumber and concatenate sources in a single df + + # Call multi_source_bfs + # multi_source_bfs( + # G, + # sources, + # components=components, + # depth_limit=depth_limit, + # offload=offload, + # ) + + +def multi_source_bfs( + G, sources, components=None, depth_limit=None, offload=False +): + """ + Find the breadth first traversal from multiple sources in a graph. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + The adjacency list will be computed if not already present. + + sources : cudf.Series + Subset of vertices from which the traversals start. A BFS is run for + each source in the Series. + The size of the series should be at least one and cannot exceed the + size of the graph. + + depth_limit : Integer, optional, default=None + Limit the depth of the search. Terminates if no more vertices are + reachable within the distance of depth_limit + + components : cudf.DataFrame, optional, default=None + GPU Dataframe containing the component information. + Passing this information may impact the return type. + When no component information is passed BFS uses one component + behavior settings. + + components['vertex'] : cudf.Series + vertex IDs + components['color'] : cudf.Series + component IDs/color for vertices. + + offload : boolean, optional, default=False + Indicates if output should be written to the disk. + When not provided, the algorithms decides if offloading is needed + based on the input parameters. + + Returns + ------- + Return value type is decided based on the input parameters (size of + sources, size of the graph, number of components and offload setting) + If G is a cugraph.Graph, returns : + cudf.DataFrame + df['vertex'] vertex IDs + + df['distance_'] path distance for each vertex from the + starting vertex. One column per source. + + df['predecessor_'] for each i'th position in the column, + the vertex ID immediately preceding the vertex at position i in + the 'vertex' column. One column per source. + + If G is a cugraph.Graph and component information is present returns : + BFS_edge_lists : cudf.DataFrame + GPU data frame containing all BFS edges + source_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each source. + + If offload is True, or if the output does not fit in memory : + Writes csv files containing BFS output to the disk. + """ + raise NotImplementedError( + "concurrent_bfs is coming soon! Please up vote the github issue 1465\ + to help us prioritize" + ) + # if components is not None: + # null_check(components["vertex"]) + # null_check(components["colors"]) + # + # if depth_limit is not None: + # raise NotImplementedError( + # "depth limit implementation of BFS is not currently supported" + # ) + + # if offload is True: + # raise NotImplementedError( + # "Offloading is coming soon! Please up vote the github issue 1461 + # to help us prioritize" + # ) + if isinstance(sources, list): + sources = cudf.Series(sources) + if G.renumbered is True: + sources = G.lookup_internal_vertex_id(cudf.Series(sources)) + if not G.adjlist: + G.view_adj_list() + # Memory footprint check + footprint = _get_feasibility( + G, sources, components=components, depth_limit=depth_limit + ) + print(footprint) + # Call multi_source_bfs + # FIXME remove when implemented + # raise NotImplementedError("Commming soon") diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index 39b789d7f79..adaec0f9e44 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -26,6 +26,7 @@ from cupyx.scipy.sparse.coo import coo_matrix as cp_coo_matrix from cupyx.scipy.sparse.csr import csr_matrix as cp_csr_matrix from cupyx.scipy.sparse.csc import csc_matrix as cp_csc_matrix + CP_MATRIX_TYPES = [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix] CP_COMPRESSED_MATRIX_TYPES = [cp_csr_matrix, cp_csc_matrix] except ModuleNotFoundError: @@ -38,6 +39,7 @@ from scipy.sparse.coo import coo_matrix as sp_coo_matrix from scipy.sparse.csr import csr_matrix as sp_csr_matrix from scipy.sparse.csc import csc_matrix as sp_csc_matrix + SP_MATRIX_TYPES = [sp_coo_matrix, sp_csr_matrix, sp_csc_matrix] SP_COMPRESSED_MATRIX_TYPES = [sp_csr_matrix, sp_csc_matrix] except ModuleNotFoundError: @@ -80,15 +82,21 @@ def get_traversed_path(df, id): >>> path = cugraph.utils.get_traversed_path(sssp_df, 32) """ - if 'vertex' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'vertex' column missing") - if 'distance' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'distance' column missing") - if 'predecessor' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'predecessor' column missing") + if "vertex" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing" + ) + if "distance" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing" + ) + if "predecessor" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing" + ) if type(id) != int: raise ValueError("The vertex 'id' needs to be an integer") @@ -96,17 +104,17 @@ def get_traversed_path(df, id): # or edited. Therefore we cannot assume that using the vertex ID # as an index will work - ddf = df[df['vertex'] == id] + ddf = df[df["vertex"] == id] if len(ddf) == 0: raise ValueError("The vertex (", id, " is not in the result set") - pred = ddf['predecessor'].iloc[0] + pred = ddf["predecessor"].iloc[0] answer = [] answer.append(ddf) while pred != -1: - ddf = df[df['vertex'] == pred] - pred = ddf['predecessor'].iloc[0] + ddf = df[df["vertex"] == pred] + pred = ddf["predecessor"].iloc[0] answer.append(ddf) return cudf.concat(answer) @@ -138,15 +146,21 @@ def get_traversed_path_list(df, id): >>> path = cugraph.utils.get_traversed_path_list(sssp_df, 32) """ - if 'vertex' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'vertex' column missing") - if 'distance' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'distance' column missing") - if 'predecessor' not in df.columns: - raise ValueError("DataFrame does not appear to be a BFS or " - "SSP result - 'predecessor' column missing") + if "vertex" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing" + ) + if "distance" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing" + ) + if "predecessor" not in df.columns: + raise ValueError( + "DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing" + ) if type(id) != int: raise ValueError("The vertex 'id' needs to be an integer") @@ -158,17 +172,17 @@ def get_traversed_path_list(df, id): answer = [] answer.append(id) - ddf = df[df['vertex'] == id] + ddf = df[df["vertex"] == id] if len(ddf) == 0: raise ValueError("The vertex (", id, " is not in the result set") - pred = ddf['predecessor'].iloc[0] + pred = ddf["predecessor"].iloc[0] while pred != -1: answer.append(pred) - ddf = df[df['vertex'] == pred] - pred = ddf['predecessor'].iloc[0] + ddf = df[df["vertex"] == pred] + pred = ddf["predecessor"].iloc[0] return answer @@ -206,6 +220,14 @@ def is_device_version_less_than(min_version=(7, 0)): return False +def get_device_memory_info(): + """ + Returns the total amount of global memory on the device in bytes + """ + meminfo = cuda.current_context().get_memory_info() + return meminfo[1] + + # FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if # set. An additional optional parameter for the weight attr name when accepting # Nx graphs may be needed. From the Nx docs: @@ -229,29 +251,35 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None): elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): return (convert_from_nx(obj, weight=nx_weight_attr), input_type) - elif (input_type in CP_MATRIX_TYPES) or \ - (input_type in SP_MATRIX_TYPES): + elif (input_type in CP_MATRIX_TYPES) or (input_type in SP_MATRIX_TYPES): if matrix_graph_type is None: matrix_graph_type = Graph elif matrix_graph_type not in [Graph, DiGraph]: - raise TypeError(f"matrix_graph_type must be either a cugraph " - f"Graph or DiGraph, got: {matrix_graph_type}") - - if input_type in (CP_COMPRESSED_MATRIX_TYPES + - SP_COMPRESSED_MATRIX_TYPES): + raise TypeError( + f"matrix_graph_type must be either a cugraph " + f"Graph or DiGraph, got: {matrix_graph_type}" + ) + + if input_type in ( + CP_COMPRESSED_MATRIX_TYPES + SP_COMPRESSED_MATRIX_TYPES + ): coo = obj.tocoo(copy=False) else: coo = obj if input_type in CP_MATRIX_TYPES: - df = cudf.DataFrame({"source": cp.ascontiguousarray(coo.row), - "destination": cp.ascontiguousarray(coo.col), - "weight": cp.ascontiguousarray(coo.data)}) + df = cudf.DataFrame( + { + "source": cp.ascontiguousarray(coo.row), + "destination": cp.ascontiguousarray(coo.col), + "weight": cp.ascontiguousarray(coo.data), + } + ) else: - df = cudf.DataFrame({"source": coo.row, - "destination": coo.col, - "weight": coo.data}) + df = cudf.DataFrame( + {"source": coo.row, "destination": coo.col, "weight": coo.data} + ) # FIXME: # * do a quick check that symmetry is stored explicitly in the cupy # data for sym matrices (ie. for each uv, check vu is there) From ce807985c6ebed409485ba46a61291d92eb0ed9b Mon Sep 17 00:00:00 2001 From: Mike Wendt <1915404+mike-wendt@users.noreply.github.com> Date: Mon, 29 Mar 2021 12:26:34 -0400 Subject: [PATCH 32/51] ENH Update conda recipes pinning of repo dependencies (#1485) Ensure all conda packages created in this repo that depend on other packages are all version pinned to the same build number. This way it prevents a conda solve from picking mismatched versions of `cugraph` and `libcugraph` that can break this repo during builds and testing. Authors: - Mike Wendt (@mike-wendt) Approvers: - Brad Rees (@BradReesWork) - Ray Douglass (@raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1485 --- conda/recipes/cugraph/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 1ef64ddbe72..4b845583181 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -25,13 +25,13 @@ requirements: build: - python x.x - cython>=0.29,<0.30 - - libcugraph={{ version }} + - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} - ucx-proc=*=gpu run: - python x.x - - libcugraph={{ version }} + - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} From e60d9f7744e7c4cef70b7cddb3392c0e6f83936d Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 30 Mar 2021 10:19:56 -0400 Subject: [PATCH 33/51] Update Louvain to use new graph primitives and pattern accelerators (#1423) Implement the `update_by_delta_modularity` method using the new graph primitives and pattern accelerators. This eliminates all of the custom MNMG implementation originally created for MNMG Louvain a few releases ago and replaces it with the new pattern accelerator and graph primitives that have been added in the last couple of releases. This depends on the following PRs and should not be merged until after them: * #1394 * #1399 closes #1220 Authors: - Chuck Hastings (@ChuckHastings) Approvers: - Andrei Schaffer (@aschaffer) - Seunghwa Kang (@seunghwak) - Rick Ratzel (@rlratzel) - Alex Fender (@afender) URL: https://github.com/rapidsai/cugraph/pull/1423 --- cpp/include/algorithms.hpp | 79 +- .../dendrogram.cuh => include/dendrogram.hpp} | 39 +- .../experimental/include_cuco_static_map.cuh | 0 cpp/include/graph.hpp | 6 +- ...ransform_reduce_key_aggregated_out_nbr.cuh | 2 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 - cpp/include/utilities/collect_comm.cuh | 2 +- cpp/include/utilities/device_comm.cuh | 18 +- cpp/src/community/ecg.cu | 2 +- cpp/src/community/flatten_dendrogram.cuh | 39 +- cpp/src/community/leiden.cuh | 2 +- cpp/src/community/louvain.cu | 191 ++- cpp/src/community/louvain.cuh | 10 +- cpp/src/experimental/louvain.cuh | 1382 ++++------------- cpp/src/experimental/shuffle.cuh | 226 --- cpp/tests/CMakeLists.txt | 19 +- cpp/tests/community/louvain_test.cpp | 176 ++- cpp/tests/community/mg_louvain_helper.cu | 353 +++++ cpp/tests/community/mg_louvain_helper.hpp | 53 + cpp/tests/community/mg_louvain_test.cpp | 233 +++ cpp/tests/experimental/louvain_test.cu | 133 -- cpp/tests/utilities/base_fixture.hpp | 5 + 22 files changed, 1415 insertions(+), 1557 deletions(-) rename cpp/{src/community/dendrogram.cuh => include/dendrogram.hpp} (55%) rename cpp/{src => include}/experimental/include_cuco_static_map.cuh (100%) delete mode 100644 cpp/src/experimental/shuffle.cuh create mode 100644 cpp/tests/community/mg_louvain_helper.cu create mode 100644 cpp/tests/community/mg_louvain_helper.hpp create mode 100644 cpp/tests/community/mg_louvain_test.cpp delete mode 100644 cpp/tests/experimental/louvain_test.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 8a5474b389c..b8706d81e21 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -14,10 +14,14 @@ * limitations under the License. */ #pragma once + +#include #include #include + #include #include + #include namespace cugraph { @@ -612,7 +616,7 @@ weight_t hungarian(raft::handle_t const &handle, * * @throws cugraph::logic_error when an error occurs. * - * @tparam graph_t Type of graph + * @tparam graph_view_t Type of graph * * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph input graph object (CSR) @@ -629,13 +633,74 @@ weight_t hungarian(raft::handle_t const &handle, * 2) modularity of the returned clustering * */ -template -std::pair louvain( +template +std::pair louvain( raft::handle_t const &handle, - graph_t const &graph, - typename graph_t::vertex_type *clustering, - size_t max_level = 100, - typename graph_t::weight_type resolution = typename graph_t::weight_type{1}); + graph_view_t const &graph_view, + typename graph_view_t::vertex_type *clustering, + size_t max_level = 100, + typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); + +/** + * @brief Louvain implementation, returning dendrogram + * + * Compute a clustering of the graph by maximizing modularity + * + * Computed using the Louvain method described in: + * + * VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast unfolding of + * community hierarchies in large networks, J Stat Mech P10008 (2008), + * http://arxiv.org/abs/0803.0476 + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam graph_view_t Type of graph + * + * @param[in] handle Library handle (RAFT) + * @param[in] graph_view Input graph view object (CSR) + * @param[in] max_level (optional) maximum number of levels to run (default 100) + * @param[in] resolution (optional) The value of the resolution parameter to use. + * Called gamma in the modularity formula, this changes the size + * of the communities. Higher resolutions lead to more smaller + * communities, lower resolutions lead to fewer larger + * communities. (default 1) + * + * @return a pair containing: + * 1) unique pointer to dendrogram + * 2) modularity of the returned clustering + * + */ +template +std::pair>, + typename graph_view_t::weight_type> +louvain(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t max_level = 100, + typename graph_view_t::weight_type resolution = typename graph_view_t::weight_type{1}); + +/** + * @brief Flatten a Dendrogram at a particular level + * + * A Dendrogram represents a hierarchical clustering/partitioning of + * a graph. This function will flatten the hierarchical clustering into + * a label for each vertex representing the final cluster/partition to + * which it is assigned + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam graph_view_t Type of graph + * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param[in] graph input graph object + * @param[in] dendrogram input dendrogram object + * @param[out] clustering Pointer to device array where the clustering should be stored + * + */ +template +void flatten_dendrogram(raft::handle_t const &handle, + graph_view_t const &graph_view, + Dendrogram const &dendrogram, + typename graph_view_t::vertex_type *clustering); /** * @brief Leiden implementation diff --git a/cpp/src/community/dendrogram.cuh b/cpp/include/dendrogram.hpp similarity index 55% rename from cpp/src/community/dendrogram.cuh rename to cpp/include/dendrogram.hpp index 414f5f3854d..bb9ba470a52 100644 --- a/cpp/src/community/dendrogram.cuh +++ b/cpp/include/dendrogram.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -25,30 +25,26 @@ namespace cugraph { template class Dendrogram { public: - void add_level(vertex_t num_verts, + void add_level(vertex_t first_index, + vertex_t num_verts, cudaStream_t stream = 0, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - level_ptr_.push_back( - std::make_unique(num_verts * sizeof(vertex_t), stream, mr)); - level_size_.push_back(num_verts); + level_ptr_.push_back(std::make_unique>(num_verts, stream, mr)); + level_first_index_.push_back(first_index); } - size_t current_level() const { return level_size_.size() - 1; } + size_t current_level() const { return level_ptr_.size() - 1; } - size_t num_levels() const { return level_size_.size(); } + size_t num_levels() const { return level_ptr_.size(); } - vertex_t const *get_level_ptr_nocheck(size_t level) const - { - return static_cast(level_ptr_[level]->data()); - } + vertex_t const *get_level_ptr_nocheck(size_t level) const { return level_ptr_[level]->data(); } - vertex_t *get_level_ptr_nocheck(size_t level) - { - return static_cast(level_ptr_[level]->data()); - } + vertex_t *get_level_ptr_nocheck(size_t level) { return level_ptr_[level]->data(); } - vertex_t get_level_size_nocheck(size_t level) const { return level_size_[level]; } + size_t get_level_size_nocheck(size_t level) const { return level_ptr_[level]->size(); } + + vertex_t get_level_first_index_nocheck(size_t level) const { return level_first_index_[level]; } vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } @@ -58,11 +54,16 @@ class Dendrogram { vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } - vertex_t current_level_size() const { return get_level_size_nocheck(current_level()); } + size_t current_level_size() const { return get_level_size_nocheck(current_level()); } + + vertex_t current_level_first_index() const + { + return get_level_first_index_nocheck(current_level()); + } private: - std::vector level_size_; - std::vector> level_ptr_; + std::vector level_first_index_; + std::vector>> level_ptr_; }; } // namespace cugraph diff --git a/cpp/src/experimental/include_cuco_static_map.cuh b/cpp/include/experimental/include_cuco_static_map.cuh similarity index 100% rename from cpp/src/experimental/include_cuco_static_map.cuh rename to cpp/include/experimental/include_cuco_static_map.cuh diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index b30159566b5..8ea58546ce1 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,10 @@ class GraphViewBase { edge_t *local_edges; vertex_t *local_offsets; + vertex_t get_number_of_vertices() const { return number_of_vertices; } + + vertex_t get_local_vertex_first() const { return vertex_t{0}; } + /** * @brief Fill the identifiers array with the vertex identifiers. * diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 11cf2cb1137..19a5f67c9de 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -27,7 +27,7 @@ #include -#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 0b3588bc8c5..e621ed91ddb 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -25,8 +25,6 @@ #include -#include - #include namespace cugraph { diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh index 5ca58ebeb17..8d2227c0f60 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/utilities/collect_comm.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/utilities/device_comm.cuh index 7b9956902cc..53711f21a6c 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -238,10 +238,12 @@ template struct device_sendrecv_tuple_iterator_element_impl { void run(raft::comms::comms_t const& comm, InputIterator input_first, - size_t count, + size_t tx_count, int dst, - int base_tag, - raft::comms::request_t* requests) const + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) const { } }; @@ -460,7 +462,7 @@ struct device_reduce_tuple_iterator_element_impl { op, root, stream); - device_reduce_tuple_iterator_element_impl( + device_reduce_tuple_iterator_element_impl().run( comm, input_first, output_first, count, op, root, stream); } }; @@ -889,9 +891,11 @@ device_reduce(raft::comms::comms_t const& comm, size_t constexpr tuple_size = thrust::tuple_size::value_type>::value; - detail:: - device_reduce_tuple_iterator_element_impl( - comm, input_first, output_first, count, op, root, stream); + detail::device_reduce_tuple_iterator_element_impl() + .run(comm, input_first, output_first, count, op, root, stream); } template diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index 994204ecd32..45f7d723191 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -117,7 +117,7 @@ class EcgLouvain : public cugraph::Louvain { void initialize_dendrogram_level(vertex_t num_vertices) override { - this->dendrogram_->add_level(num_vertices); + this->dendrogram_->add_level(0, num_vertices); get_permutation_vector( num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 892fe2d1c51..6d455a68192 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -31,23 +31,28 @@ void partition_at_level(raft::handle_t const &handle, size_t level) { vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); + rmm::device_uvector local_vertex_ids_v(local_num_verts, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertex_ids, - d_vertex_ids + local_num_verts, - d_partition); - - std::for_each(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(level), - [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) { - cugraph::experimental::relabel( - handle, - std::tuple( - d_vertex_ids, dendrogram.get_level_ptr_nocheck(l)), - dendrogram.get_level_size_nocheck(l), - d_partition, - local_num_verts); - }); + raft::copy(d_partition, d_vertex_ids, local_num_verts, handle.get_stream()); + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(level), + [&handle, &dendrogram, &local_vertex_ids_v, d_vertex_ids, &d_partition, local_num_verts]( + size_t l) { + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertex_ids_v.begin(), + local_vertex_ids_v.begin() + dendrogram.get_level_size_nocheck(l), + dendrogram.get_level_first_index_nocheck(l)); + + cugraph::experimental::relabel( + handle, + std::tuple(local_vertex_ids_v.data(), + dendrogram.get_level_ptr_nocheck(l)), + dendrogram.get_level_size_nocheck(l), + d_partition, + local_num_verts); + }); } } // namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index 141f8beac40..aae2d3712b5 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -132,7 +132,7 @@ class Leiden : public Louvain { // // Initialize every cluster to reference each vertex to itself // - this->dendrogram_->add_level(current_graph.number_of_vertices); + this->dendrogram_->add_level(0, current_graph.number_of_vertices); thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->dendrogram_->current_level_begin(), diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index a851777ad93..2affcf29805 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -26,50 +26,28 @@ namespace cugraph { namespace detail { template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph_view, - vertex_t *clustering, - size_t max_level, - weight_t resolution) +std::pair>, weight_t> louvain( + raft::handle_t const &handle, + GraphCSRView const &graph_view, + size_t max_level, + weight_t resolution) { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, - "Invalid input argument: clustering is null, should be a device pointer to " - "memory for storing the result"); Louvain> runner(handle, graph_view); weight_t wt = runner(max_level, resolution); - rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_ids_v.begin(), - vertex_ids_v.end(), - vertex_t{0}); - - partition_at_level(handle, - runner.get_dendrogram(), - vertex_ids_v.data(), - clustering, - runner.get_dendrogram().num_levels()); - - // FIXME: Consider returning the Dendrogram at some point - return std::make_pair(runner.get_dendrogram().num_levels(), wt); + return std::make_pair(runner.move_dendrogram(), wt); } template -std::pair louvain( +std::pair>, weight_t> louvain( raft::handle_t const &handle, experimental::graph_view_t const &graph_view, - vertex_t *clustering, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(clustering != nullptr, - "Invalid input argument: clustering is null, should be a device pointer to " - "memory for storing the result"); - // "FIXME": remove this check and the guards below // // Disable louvain(experimental::graph_view_t,...) @@ -87,40 +65,153 @@ std::pair louvain( weight_t wt = runner(max_level, resolution); - rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), - handle.get_stream()); + return std::make_pair(runner.move_dendrogram(), wt); + } +} - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_ids_v.begin(), - vertex_ids_v.end(), - graph_view.get_local_vertex_first()); +template +void flatten_dendrogram(raft::handle_t const &handle, + GraphCSRView const &graph_view, + Dendrogram const &dendrogram, + vertex_t *clustering) +{ + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - partition_at_level(handle, - runner.get_dendrogram(), - vertex_ids_v.data(), - clustering, - runner.get_dendrogram().num_levels()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + vertex_t{0}); - // FIXME: Consider returning the Dendrogram at some point - return std::make_pair(runner.get_dendrogram().num_levels(), wt); - } + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); +} + +template +void flatten_dendrogram( + raft::handle_t const &handle, + experimental::graph_view_t const &graph_view, + Dendrogram const &dendrogram, + vertex_t *clustering) +{ + rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), + handle.get_stream()); + + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + graph_view.get_local_vertex_first()); + + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); } } // namespace detail -template -std::pair louvain(raft::handle_t const &handle, - graph_t const &graph, - typename graph_t::vertex_type *clustering, - size_t max_level, - typename graph_t::weight_type resolution) +template +std::pair>, + typename graph_view_t::weight_type> +louvain(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t max_level, + typename graph_view_t::weight_type resolution) +{ + return detail::louvain(handle, graph_view, max_level, resolution); +} + +template +void flatten_dendrogram(raft::handle_t const &handle, + graph_view_t const &graph_view, + Dendrogram const &dendrogram, + typename graph_view_t::vertex_type *clustering) { + detail::flatten_dendrogram(handle, graph_view, dendrogram, clustering); +} + +template +std::pair louvain( + raft::handle_t const &handle, + graph_view_t const &graph_view, + typename graph_view_t::vertex_type *clustering, + size_t max_level, + typename graph_view_t::weight_type resolution) +{ + using vertex_t = typename graph_view_t::vertex_type; + using weight_t = typename graph_view_t::weight_type; + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); - return detail::louvain(handle, graph, clustering, max_level, resolution); + std::unique_ptr> dendrogram; + weight_t modularity; + + std::tie(dendrogram, modularity) = louvain(handle, graph_view, max_level, resolution); + + flatten_dendrogram(handle, graph_view, *dendrogram, clustering); + + return std::make_pair(dendrogram->num_levels(), modularity); } // Explicit template instantations +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, float> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + float); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); +template std::pair>, double> louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + size_t, + double); + template std::pair louvain( raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); template std::pair louvain(raft::handle_t const &, diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e28f0f1746d..0862bbc62a9 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include @@ -138,9 +138,11 @@ class Louvain { return Q; } - Dendrogram &get_dendrogram() const { return *dendrogram_; } + Dendrogram const &get_dendrogram() const { return *dendrogram_; } - std::unique_ptr> move_dendrogram() { return dendrogram_; } + Dendrogram &get_dendrogram() { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } virtual weight_t operator()(size_t max_level, weight_t resolution) { @@ -208,7 +210,7 @@ class Louvain { virtual void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(num_vertices); + dendrogram_->add_level(0, num_vertices); thrust::sequence(rmm::exec_policy(stream_)->on(stream_), dendrogram_->current_level_begin(), diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index fe8310a62ca..3136515faa6 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -15,28 +15,21 @@ */ #pragma once -#include +#include #include - -#include -#include -#include -#include - -#include - #include + #include #include +#include +#include #include #include +#include -#include - -#include - -#include +#include +#include //#define TIMING @@ -47,343 +40,6 @@ namespace cugraph { namespace experimental { -namespace detail { - -#ifdef CUCO_STATIC_MAP_DEFINED -template -struct create_cuco_pair_t { - cuco::pair_type __device__ operator()(data_t data) - { - cuco::pair_type tmp; - tmp.first = data; - tmp.second = data_t{0}; - return tmp; - } -}; -#endif - -// -// These classes should allow cuco::static_map to generate hash tables of -// different configurations. -// - -// -// Compare edges based on src[e] and dst[e] matching -// -template -class src_dst_equality_comparator_t { - public: - src_dst_equality_comparator_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - sentinel_t sentinel_value) - : d_src_{src.data().get()}, d_dst_{dst.data().get()}, sentinel_value_(sentinel_value) - { - } - - src_dst_equality_comparator_t(data_t const *d_src, data_t const *d_dst, sentinel_t sentinel_value) - : d_src_{d_src}, d_dst_{d_dst}, sentinel_value_(sentinel_value) - { - } - - template - __device__ bool operator()(idx_type lhs_index, idx_type rhs_index) const noexcept - { - return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && - (d_src_[lhs_index] == d_src_[rhs_index]) && (d_dst_[lhs_index] == d_dst_[rhs_index]); - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - sentinel_t sentinel_value_; -}; - -// -// Hash edges based src[e] and dst[e] -// -template -class src_dst_hasher_t { - public: - src_dst_hasher_t(rmm::device_vector const &src, rmm::device_vector const &dst) - : d_src_{src.data().get()}, d_dst_{dst.data().get()} - { - } - - src_dst_hasher_t(data_t const *d_src, data_t const *d_dst) : d_src_{d_src}, d_dst_{d_dst} {} - - template - __device__ auto operator()(idx_type index) const - { - cuco::detail::MurmurHash3_32 hasher; - - auto h_src = hasher(d_src_[index]); - auto h_dst = hasher(d_dst_[index]); - - /* - * Combine the source hash and the dest hash into a single hash value - * - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - */ - h_src ^= h_dst + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); - - return h_src; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; -}; - -// -// Compare edges based on src[e] and cluster[dst[e]] matching -// -template -class src_cluster_equality_comparator_t { - public: - src_cluster_equality_comparator_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - rmm::device_vector const &dst_cluster_cache, - data_t base_dst_id, - sentinel_t sentinel_value) - : d_src_{src.data().get()}, - d_dst_{dst.data().get()}, - d_dst_cluster_{dst_cluster_cache.data().get()}, - base_dst_id_(base_dst_id), - sentinel_value_(sentinel_value) - { - } - - src_cluster_equality_comparator_t(data_t const *d_src, - data_t const *d_dst, - data_t const *d_dst_cluster_cache, - data_t base_dst_id, - sentinel_t sentinel_value) - : d_src_{d_src}, - d_dst_{d_dst}, - d_dst_cluster_{d_dst_cluster_cache}, - base_dst_id_(base_dst_id), - sentinel_value_(sentinel_value) - { - } - - __device__ bool operator()(sentinel_t lhs_index, sentinel_t rhs_index) const noexcept - { - return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && - (d_src_[lhs_index] == d_src_[rhs_index]) && - (d_dst_cluster_[d_dst_[lhs_index] - base_dst_id_] == - d_dst_cluster_[d_dst_[rhs_index] - base_dst_id_]); - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - data_t const *d_dst_cluster_; - data_t base_dst_id_; - sentinel_t sentinel_value_; -}; - -// -// Hash edges based src[e] and cluster[dst[e]] -// -template -class src_cluster_hasher_t { - public: - src_cluster_hasher_t(rmm::device_vector const &src, - rmm::device_vector const &dst, - rmm::device_vector const &dst_cluster_cache, - data_t base_dst_id) - : d_src_{src.data().get()}, - d_dst_{dst.data().get()}, - d_dst_cluster_{dst_cluster_cache.data().get()}, - base_dst_id_(base_dst_id) - { - } - - src_cluster_hasher_t(data_t const *d_src, - data_t const *d_dst, - data_t const *d_dst_cluster_cache, - data_t base_dst_id) - : d_src_{d_src}, d_dst_{d_dst}, d_dst_cluster_{d_dst_cluster_cache}, base_dst_id_(base_dst_id) - { - } - - template - __device__ auto operator()(idx_type index) const - { - cuco::detail::MurmurHash3_32 hasher; - - auto h_src = hasher(d_src_[index]); - auto h_cluster = hasher(d_dst_cluster_[d_dst_[index] - base_dst_id_]); - - /* - * Combine the source hash and the cluster hash into a single hash value - * - * Taken from the Boost hash_combine function - * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html - */ - h_src ^= h_cluster + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); - - return h_src; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; - data_t const *d_dst_cluster_; - data_t base_dst_id_; -}; - -// -// Skip edges where src[e] == dst[e] -// -template -class skip_edge_t { - public: - skip_edge_t(rmm::device_vector const &src, rmm::device_vector const &dst) - : d_src_{src.data().get()}, d_dst_{dst.data().get()} - { - } - - skip_edge_t(data_t const *src, data_t const *dst) : d_src_{src}, d_dst_{dst} {} - - template - __device__ auto operator()(idx_type index) const - { - return d_src_[index] == d_dst_[index]; - } - - private: - data_t const *d_src_; - data_t const *d_dst_; -}; - -template -struct lookup_by_vertex_id { - public: - lookup_by_vertex_id(data_t const *d_array, vertex_t const *d_vertices, vertex_t base_vertex_id) - : d_array_(d_array), d_vertices_(d_vertices), base_vertex_id_(base_vertex_id) - { - } - - template - data_t operator() __device__(edge_t edge_id) const - { - return d_array_[d_vertices_[edge_id] - base_vertex_id_]; - } - - private: - data_t const *d_array_; - vertex_t const *d_vertices_; - vertex_t base_vertex_id_; -}; - -template -vector_t remove_elements_from_vector(vector_t const &input_v, - iterator_t iterator_begin, - iterator_t iterator_end, - function_t function, - cudaStream_t stream) -{ - vector_t temp_v(input_v.size()); - - auto last = thrust::copy_if( - rmm::exec_policy(stream)->on(stream), iterator_begin, iterator_end, temp_v.begin(), function); - - temp_v.resize(thrust::distance(temp_v.begin(), last)); - - return temp_v; -} - -template -vector_t remove_elements_from_vector(vector_t const &input_v, - function_t function, - cudaStream_t stream) -{ - return remove_elements_from_vector(input_v, input_v.begin(), input_v.end(), function, stream); -} - -// FIXME: This should be a generic utility. The one in cython.cu -// is very close to this -template * = nullptr> -std::unique_ptr> -create_graph(raft::handle_t const &handle, - rmm::device_vector const &src_v, - rmm::device_vector const &dst_v, - rmm::device_vector const &weight_v, - std::size_t num_local_verts, - experimental::graph_properties_t graph_props, - view_t const &view) -{ - std::vector> edgelist( - {{src_v.data().get(), - dst_v.data().get(), - weight_v.data().get(), - static_cast(src_v.size())}}); - - return std::make_unique>( - handle, - edgelist, - view.get_partition(), - num_local_verts, - src_v.size(), - graph_props, - false, - false); -} - -template * = nullptr> -std::unique_ptr> -create_graph(raft::handle_t const &handle, - rmm::device_vector const &src_v, - rmm::device_vector const &dst_v, - rmm::device_vector const &weight_v, - std::size_t num_local_verts, - experimental::graph_properties_t graph_props, - view_t const &view) -{ - experimental::edgelist_t edgelist{ - src_v.data().get(), - dst_v.data().get(), - weight_v.data().get(), - static_cast(src_v.size())}; - - return std::make_unique>( - handle, edgelist, num_local_verts, graph_props, false, false); -} - -} // namespace detail - -// -// FIXME: Ultimately, this would be cleaner and more efficient if we did the following: -// -// 1) Create an object that does a single level Louvain computation on an input graph -// (no graph contraction) -// 2) Create an object that does graph contraction -// 3) Create Louvain to use these objects in sequence to compute the aggregate result. -// -// In MNMG-world, the graph contraction step is going to create another graph that likely -// fits efficiently in a smaller number of GPUs (eventually one). Decomposing the algorithm -// as above would allow us to eventually run the single GPU version of single level Louvain -// on the contracted graphs - which should be more efficient. -// -// FIXME: We should return the dendrogram and let the python layer clean it up (or have a -// separate C++ function to flatten the dendrogram). There are customers that might -// like the dendrogram and the implementation would be a bit cleaner if we did the -// collapsing as a separate step -// template class Louvain { public: @@ -405,67 +61,31 @@ class Louvain { handle_(handle), dendrogram_(std::make_unique>()), current_graph_view_(graph_view), - compute_partition_(handle, graph_view), - local_num_vertices_(graph_view.get_number_of_local_vertices()), - local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()), - local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()), - local_num_edges_(graph_view.get_number_of_edges()), - vertex_weights_v_(graph_view.get_number_of_local_vertices()), - cluster_weights_v_(graph_view.get_number_of_local_vertices()), - number_of_vertices_(graph_view.get_number_of_local_vertices()), - stream_(handle.get_stream()) + cluster_keys_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + cluster_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + vertex_weights_v_(graph_view.get_number_of_local_vertices(), handle.get_stream()), + src_vertex_weights_cache_v_(0, handle.get_stream()), + src_cluster_cache_v_(0, handle.get_stream()), + dst_cluster_cache_v_(0, handle.get_stream()) { - if (graph_view_t::is_multi_gpu) { - rank_ = handle.get_comms().get_rank(); - base_vertex_id_ = graph_view.get_local_vertex_first(); - base_src_vertex_id_ = graph_view.get_local_adj_matrix_partition_row_first(0); - base_dst_vertex_id_ = graph_view.get_local_adj_matrix_partition_col_first(0); - - local_num_edges_ = thrust::transform_reduce( - thrust::host, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator( - graph_view.get_number_of_local_adj_matrix_partitions()), - [&graph_view](auto indx) { - return graph_view.get_number_of_local_adj_matrix_partition_edges(indx); - }, - size_t{0}, - thrust::plus()); - - CUDA_TRY(cudaStreamSynchronize(stream_)); - } - - src_indices_v_.resize(local_num_edges_); - - cugraph::detail::offsets_to_indices( - current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - - if (base_src_vertex_id_ > 0) { - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - src_indices_v_.begin(), - src_indices_v_.end(), - thrust::make_constant_iterator(base_src_vertex_id_), - src_indices_v_.begin(), - thrust::plus()); - } } - Dendrogram &get_dendrogram() const { return *dendrogram_; } + Dendrogram const &get_dendrogram() const { return *dendrogram_; } + + Dendrogram &get_dendrogram() { return *dendrogram_; } - std::unique_ptr> move_dendrogram() { return dendrogram_; } + std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } virtual weight_t operator()(size_t max_level, weight_t resolution) { weight_t best_modularity = weight_t{-1}; -#ifdef CUCO_STATIC_MAP_DEFINED - weight_t total_edge_weight; - total_edge_weight = experimental::transform_reduce_e( + weight_t total_edge_weight = experimental::transform_reduce_e( handle_, current_graph_view_, thrust::make_constant_iterator(0), thrust::make_constant_iterator(0), - [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, + [] __device__(auto src, auto dst, weight_t wt, auto, auto) { return wt; }, weight_t{0}); while (dendrogram_->num_levels() < max_level) { @@ -486,7 +106,6 @@ class Louvain { } timer_display(std::cout); -#endif return best_modularity; } @@ -495,14 +114,23 @@ class Louvain { void timer_start(std::string const ®ion) { #ifdef TIMING - if (rank_ == 0) hr_timer_.start(region); + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) hr_timer_.start(region); + } else { + hr_timer_.start(region); + } #endif } void timer_stop(cudaStream_t stream) { #ifdef TIMING - if (rank_ == 0) { + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) { + CUDA_TRY(cudaStreamSynchronize(stream)); + hr_timer_.stop(); + } + } else { CUDA_TRY(cudaStreamSynchronize(stream)); hr_timer_.stop(); } @@ -512,36 +140,46 @@ class Louvain { void timer_display(std::ostream &os) { #ifdef TIMING - if (rank_ == 0) hr_timer_.display(os); + if (graph_view_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) hr_timer_.display(os); + } else { + hr_timer_.display(os); + } #endif } protected: void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(num_vertices); + dendrogram_->add_level(current_graph_view_.get_local_vertex_first(), num_vertices); - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), dendrogram_->current_level_begin(), dendrogram_->current_level_end(), - base_vertex_id_); + current_graph_view_.get_local_vertex_first()); } public: weight_t modularity(weight_t total_edge_weight, weight_t resolution) { - weight_t sum_degree_squared = experimental::transform_reduce_v( - handle_, - current_graph_view_, + weight_t sum_degree_squared = thrust::transform_reduce( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_weights_v_.begin(), + cluster_weights_v_.end(), [] __device__(weight_t p) { return p * p; }, - weight_t{0}); + weight_t{0}, + thrust::plus()); + + if (graph_t::is_multi_gpu) { + sum_degree_squared = + host_scalar_allreduce(handle_.get_comms(), sum_degree_squared, handle_.get_stream()); + } weight_t sum_internal = experimental::transform_reduce_e( handle_, current_graph_view_, - src_cluster_cache_v_.begin(), - dst_cluster_cache_v_.begin(), + d_src_cluster_cache_, + d_dst_cluster_cache_, [] __device__(auto src, auto dst, weight_t wt, auto src_cluster, auto nbr_cluster) { if (src_cluster == nbr_cluster) { return wt; @@ -561,58 +199,86 @@ class Louvain { { timer_start("compute_vertex_and_cluster_weights"); - experimental::copy_v_transform_reduce_out_nbr( - handle_, - current_graph_view_, - thrust::make_constant_iterator(0), - thrust::make_constant_iterator(0), - [] __device__(auto src, auto, auto wt, auto, auto) { return wt; }, - weight_t{0}, - vertex_weights_v_.begin()); + vertex_weights_v_ = current_graph_view_.compute_out_weight_sums(handle_); - thrust::copy(rmm::exec_policy(stream_)->on(stream_), - vertex_weights_v_.begin(), - vertex_weights_v_.end(), - cluster_weights_v_.begin()); + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + current_graph_view_.get_local_vertex_first()); - cache_vertex_properties( - vertex_weights_v_.begin(), src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + raft::copy(cluster_weights_v_.begin(), + vertex_weights_v_.begin(), + vertex_weights_v_.size(), + handle_.get_stream()); - cache_vertex_properties( - cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + d_src_vertex_weights_cache_ = + cache_src_vertex_properties(vertex_weights_v_, src_vertex_weights_cache_v_); + + if (graph_view_t::is_multi_gpu) { + auto const comm_size = handle_.get_comms().get_size(); + rmm::device_uvector rx_keys_v(0, handle_.get_stream()); + rmm::device_uvector rx_weights_v(0, handle_.get_stream()); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(cluster_keys_v_.begin(), cluster_weights_v_.begin())); + + std::forward_as_tuple(std::tie(rx_keys_v, rx_weights_v), std::ignore) = + groupby_gpuid_and_shuffle_values( + handle_.get_comms(), + pair_first, + pair_first + current_graph_view_.get_number_of_local_vertices(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ + comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle_.get_stream()); + + cluster_keys_v_ = std::move(rx_keys_v); + cluster_weights_v_ = std::move(rx_weights_v); + } - timer_stop(stream_); + timer_stop(handle_.get_stream()); } - template - void cache_vertex_properties(iterator_t const &local_input_iterator, - rmm::device_vector &src_cache_v, - rmm::device_vector &dst_cache_v, - bool src = true, - bool dst = true) + template + T *cache_src_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &src_cache_v) { - if (src) { - src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); - copy_to_adj_matrix_row( - handle_, current_graph_view_, local_input_iterator, src_cache_v.begin()); + if (graph_view_t::is_multi_gpu) { + src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), + handle_.get_stream()); + copy_to_adj_matrix_row(handle_, current_graph_view_, input.begin(), src_cache_v.begin()); + return src_cache_v.begin(); + } else { + return input.begin(); } + } - if (dst) { - dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); - copy_to_adj_matrix_col( - handle_, current_graph_view_, local_input_iterator, dst_cache_v.begin()); + template + T *cache_dst_vertex_properties(rmm::device_uvector &input, rmm::device_uvector &dst_cache_v) + { + if (graph_view_t::is_multi_gpu) { + dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols(), + handle_.get_stream()); + copy_to_adj_matrix_col(handle_, current_graph_view_, input.begin(), dst_cache_v.begin()); + return dst_cache_v.begin(); + } else { + return input.begin(); } } -#ifdef CUCO_STATIC_MAP_DEFINED virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution) { timer_start("update_clustering"); - rmm::device_vector next_cluster_v(dendrogram_->current_level_begin(), - dendrogram_->current_level_end()); + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), + handle_.get_stream()); - cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); + raft::copy(next_cluster_v.begin(), + dendrogram_->current_level_begin(), + dendrogram_->current_level_size(), + handle_.get_stream()); + + d_src_cluster_cache_ = cache_src_vertex_properties(next_cluster_v, src_cluster_cache_v_); + d_dst_cluster_cache_ = cache_dst_vertex_properties(next_cluster_v, dst_cluster_cache_v_); weight_t new_Q = modularity(total_edge_weight, resolution); weight_t cur_Q = new_Q - 1; @@ -629,691 +295,271 @@ class Louvain { up_down = !up_down; - cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); - new_Q = modularity(total_edge_weight, resolution); if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(stream_)->on(stream_), - next_cluster_v.begin(), - next_cluster_v.end(), - dendrogram_->current_level_begin()); + raft::copy(dendrogram_->current_level_begin(), + next_cluster_v.begin(), + next_cluster_v.size(), + handle_.get_stream()); } } - // cache the final clustering locally on each cpu - cache_vertex_properties( - dendrogram_->current_level_begin(), src_cluster_cache_v_, dst_cluster_cache_v_); - - timer_stop(stream_); + timer_stop(handle_.get_stream()); return cur_Q; } - void update_by_delta_modularity(weight_t total_edge_weight, - weight_t resolution, - rmm::device_vector &next_cluster_v, - bool up_down) + void compute_cluster_sum_and_subtract(rmm::device_uvector &old_cluster_sum_v, + rmm::device_uvector &cluster_subtract_v) { - rmm::device_vector old_cluster_sum_v(local_num_vertices_); - rmm::device_vector src_old_cluster_sum_cache_v; + auto output_buffer = + cugraph::experimental::allocate_dataframe_buffer>( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); experimental::copy_v_transform_reduce_out_nbr( handle_, current_graph_view_, - src_cluster_cache_v_.begin(), - dst_cluster_cache_v_.begin(), + d_src_cluster_cache_, + d_dst_cluster_cache_, [] __device__(auto src, auto dst, auto wt, auto src_cluster, auto nbr_cluster) { - if ((src != dst) && (src_cluster == nbr_cluster)) { - return wt; - } else - return weight_t{0}; - }, - weight_t{0}, - old_cluster_sum_v.begin()); - - cache_vertex_properties( - old_cluster_sum_v.begin(), src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); - - detail::src_cluster_equality_comparator_t compare( - src_indices_v_.data().get(), - current_graph_view_.indices(), - dst_cluster_cache_v_.data().get(), - base_dst_vertex_id_, - std::numeric_limits::max()); - detail::src_cluster_hasher_t hasher(src_indices_v_.data().get(), - current_graph_view_.indices(), - dst_cluster_cache_v_.data().get(), - base_dst_vertex_id_); - detail::skip_edge_t skip_edge(src_indices_v_.data().get(), - current_graph_view_.indices()); - - // - // Group edges that lead from same source to same neighboring cluster together - // local_cluster_edge_ids_v will contain edge ids of unique pairs of (src,nbr_cluster). - // If multiple edges exist, one edge id will be chosen (by a parallel race). - // nbr_weights_v will contain the combined weight of all of the edges that connect - // that pair. - // - rmm::device_vector local_cluster_edge_ids_v; - rmm::device_vector nbr_weights_v; - - // - // Perform this combining on the local edges - // - std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( - hasher, compare, skip_edge, current_graph_view_.weights(), local_num_edges_); - - // - // In order to compute delta_Q for a given src/nbr_cluster pair, I need the following - // information: - // src - // old_cluster - the cluster that src is currently assigned to - // nbr_cluster - // sum of edges going to new cluster - // vertex weight of the src vertex - // sum of edges going to old cluster - // cluster_weights of old cluster - // cluster_weights of nbr_cluster - // - // Each GPU has locally cached: - // The sum of edges going to the old cluster (computed from - // experimental::copy_v_transform_reduce_out_nbr call above. - // old_cluster - // nbr_cluster - // vertex weight of src vertex - // partial sum of edges going to the new cluster (in nbr_weights) - // - // So the plan is to take the tuple: - // (src, old_cluster, src_vertex_weight, old_cluster_sum, nbr_cluster, nbr_weights) - // and shuffle it around the cluster so that they arrive at the GPU where the pair - // (old_cluster, new_cluster) would be assigned. Then we can aggregate this information - // and compute the delta_Q values. - // - - // - // Define the communication pattern, we're going to send detail - // for edge i to the GPU that is responsible for the vertex - // pair (cluster[src[i]], cluster[dst[i]]) - // - auto communication_schedule = thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - [d_edge_device_view = compute_partition_.edge_device_view(), - d_src_indices = src_indices_v_.data().get(), - d_src_cluster = src_cluster_cache_v_.data().get(), - d_dst_indices = current_graph_view_.indices(), - d_dst_cluster = dst_cluster_cache_v_.data().get(), - base_src_vertex_id = base_src_vertex_id_, - base_dst_vertex_id = base_dst_vertex_id_] __device__(edge_t edge_id) { - return d_edge_device_view(d_src_cluster[d_src_indices[edge_id] - base_src_vertex_id], - d_dst_cluster[d_dst_indices[edge_id] - base_dst_vertex_id]); - }); + weight_t subtract{0}; + weight_t sum{0}; - // FIXME: This should really be a variable_shuffle of a tuple, for time - // reasons I'm just doing 6 independent shuffles. - // - rmm::device_vector ocs_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id(src_old_cluster_sum_cache_v.data().get(), - src_indices_v_.data().get(), - base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_cluster_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id( - src_cluster_cache_v_.data().get(), src_indices_v_.data().get(), base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_vertex_weight_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id(src_vertex_weights_cache_v_.data().get(), - src_indices_v_.data().get(), - base_src_vertex_id_)), - communication_schedule); - - rmm::device_vector src_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(src_indices_v_.begin(), local_cluster_edge_ids_v.begin()), - communication_schedule); + if (src == dst) + subtract = wt; + else if (src_cluster == nbr_cluster) + sum = wt; - rmm::device_vector nbr_cluster_v = - variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_transform_iterator( - local_cluster_edge_ids_v.begin(), - detail::lookup_by_vertex_id( - dst_cluster_cache_v_.data().get(), current_graph_view_.indices(), base_dst_vertex_id_)), - communication_schedule); - - nbr_weights_v = variable_shuffle( - handle_, nbr_weights_v.size(), nbr_weights_v.begin(), communication_schedule); - - // - // At this point, src_v, nbr_cluster_v and nbr_weights_v have been - // shuffled to the correct GPU. We can now compute the final - // value of delta_Q for each neigboring cluster - // - // Again, we'll combine edges that connect the same source to the same - // neighboring cluster and sum their weights. - // - detail::src_dst_equality_comparator_t compare2( - src_v, nbr_cluster_v, std::numeric_limits::max()); - detail::src_dst_hasher_t hasher2(src_v, nbr_cluster_v); - - auto skip_edge2 = [] __device__(auto) { return false; }; - - std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( - hasher2, compare2, skip_edge2, nbr_weights_v.data().get(), src_v.size()); - - // - // Now local_cluster_edge_ids_v contains the edge ids of the src id/dest - // cluster id pairs, and nbr_weights_v contains the weight of edges - // going to that cluster id - // - // Now we can compute (locally) each delta_Q value - // - auto iter = thrust::make_zip_iterator( - thrust::make_tuple(local_cluster_edge_ids_v.begin(), nbr_weights_v.begin())); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - iter, - iter + local_cluster_edge_ids_v.size(), - nbr_weights_v.begin(), - [total_edge_weight, - resolution, - d_src = src_v.data().get(), - d_src_cluster = src_cluster_v.data().get(), - d_nbr_cluster = nbr_cluster_v.data().get(), - d_src_vertex_weights = src_vertex_weight_v.data().get(), - d_src_cluster_weights = src_cluster_weights_cache_v_.data().get(), - d_dst_cluster_weights = dst_cluster_weights_cache_v_.data().get(), - d_ocs = ocs_v.data().get(), - base_src_vertex_id = base_src_vertex_id_, - base_dst_vertex_id = base_dst_vertex_id_] __device__(auto tuple) { - edge_t edge_id = thrust::get<0>(tuple); - vertex_t nbr_cluster = d_nbr_cluster[edge_id]; - weight_t new_cluster_sum = thrust::get<1>(tuple); - vertex_t old_cluster = d_src_cluster[edge_id]; - weight_t k_k = d_src_vertex_weights[edge_id]; - weight_t old_cluster_sum = d_ocs[edge_id]; - - weight_t a_old = d_src_cluster_weights[old_cluster - base_src_vertex_id]; - weight_t a_new = d_dst_cluster_weights[nbr_cluster - base_dst_vertex_id]; - - return 2 * (((new_cluster_sum - old_cluster_sum) / total_edge_weight) - - resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / - (total_edge_weight * total_edge_weight)); - }); - - // - // Pick the largest delta_Q value for each vertex on this gpu. - // Then we will shuffle back to the gpu by vertex id - // - rmm::device_vector final_src_v(local_cluster_edge_ids_v.size()); - rmm::device_vector final_nbr_cluster_v(local_cluster_edge_ids_v.size()); - rmm::device_vector final_nbr_weights_v(local_cluster_edge_ids_v.size()); - - auto final_input_iter = thrust::make_zip_iterator(thrust::make_tuple( - thrust::make_permutation_iterator(src_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_permutation_iterator(nbr_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - nbr_weights_v.begin())); - - auto final_output_iter = thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_cluster_v.begin(), final_nbr_weights_v.begin())); - - auto final_output_pos = - thrust::copy_if(rmm::exec_policy(stream_)->on(stream_), - final_input_iter, - final_input_iter + local_cluster_edge_ids_v.size(), - final_output_iter, - [] __device__(auto p) { return (thrust::get<2>(p) > weight_t{0}); }); - - final_src_v.resize(thrust::distance(final_output_iter, final_output_pos)); - final_nbr_cluster_v.resize(thrust::distance(final_output_iter, final_output_pos)); - final_nbr_weights_v.resize(thrust::distance(final_output_iter, final_output_pos)); - - // - // Sort the results, pick the largest version - // - thrust::sort(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), - [] __device__(auto left, auto right) { - if (thrust::get<0>(left) < thrust::get<0>(right)) return true; - if (thrust::get<0>(left) > thrust::get<0>(right)) return false; - if (thrust::get<1>(left) > thrust::get<1>(right)) return true; - if (thrust::get<1>(left) < thrust::get<1>(right)) return false; - return (thrust::get<2>(left) < thrust::get<2>(right)); - }); - - // - // Now that we're sorted the first entry for each src value is the largest. - // - local_cluster_edge_ids_v.resize(final_src_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_src_v.size()), - local_cluster_edge_ids_v.begin(), - [sentinel = std::numeric_limits::max(), - d_src = final_src_v.data().get()] __device__(edge_t edge_id) { - if (edge_id == 0) { return edge_id; } - - if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } - - return sentinel; - }); - - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { - return (edge_id != sentinel); + return thrust::make_tuple(subtract, sum); }, - stream_); + thrust::make_tuple(weight_t{0}, weight_t{0}), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer)); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer) + + current_graph_view_.get_number_of_local_vertices(), + old_cluster_sum_v.begin(), + [] __device__(auto p) { return thrust::get<1>(p); }); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer) + + current_graph_view_.get_number_of_local_vertices(), + cluster_subtract_v.begin(), + [] __device__(auto p) { return thrust::get<0>(p); }); + } - final_nbr_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_nbr_weights_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_weights_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_src_v = variable_shuffle( + void update_by_delta_modularity(weight_t total_edge_weight, + weight_t resolution, + rmm::device_uvector &next_cluster_v, + bool up_down) + { +#ifdef CUCO_STATIC_MAP_DEFINED + rmm::device_uvector old_cluster_sum_v( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + rmm::device_uvector cluster_subtract_v( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + rmm::device_uvector src_cluster_weights_v(next_cluster_v.size(), + handle_.get_stream()); + rmm::device_uvector dst_cluster_weights_v(next_cluster_v.size(), + handle_.get_stream()); + + compute_cluster_sum_and_subtract(old_cluster_sum_v, cluster_subtract_v); + + auto output_buffer = + cugraph::experimental::allocate_dataframe_buffer>( + current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); + + vertex_t *map_key_first; + vertex_t *map_key_last; + weight_t *map_value_first; + + if (graph_t::is_multi_gpu) { + cugraph::experimental::detail::compute_gpu_id_from_vertex_t vertex_to_gpu_id_op{ + handle_.get_comms().get_size()}; + + src_cluster_weights_v = cugraph::experimental::collect_values_for_keys( + handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + d_src_cluster_cache_, + d_src_cluster_cache_ + src_cluster_cache_v_.size(), + vertex_to_gpu_id_op, + handle_.get_stream()); + + dst_cluster_weights_v = cugraph::experimental::collect_values_for_keys( + handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + d_dst_cluster_cache_, + d_dst_cluster_cache_ + dst_cluster_cache_v_.size(), + vertex_to_gpu_id_op, + handle_.get_stream()); + + map_key_first = d_dst_cluster_cache_; + map_key_last = d_dst_cluster_cache_ + dst_cluster_cache_v_.size(); + map_value_first = dst_cluster_weights_v.begin(); + } else { + thrust::sort_by_key(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.begin()); + + thrust::transform(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + next_cluster_v.begin(), + next_cluster_v.end(), + src_cluster_weights_v.begin(), + [d_cluster_weights = cluster_weights_v_.data(), + d_cluster_keys = cluster_keys_v_.data(), + num_clusters = cluster_keys_v_.size()] __device__(vertex_t cluster) { + auto pos = thrust::lower_bound( + thrust::seq, d_cluster_keys, d_cluster_keys + num_clusters, cluster); + return d_cluster_weights[pos - d_cluster_keys]; + }); + + map_key_first = d_src_cluster_cache_; + map_key_last = d_src_cluster_cache_ + src_cluster_weights_v.size(); + map_value_first = src_cluster_weights_v.begin(); + } + + copy_v_transform_reduce_key_aggregated_out_nbr( handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - // - // At this point... - // final_src_v contains the source indices - // final_nbr_cluster_v contains the neighboring clusters - // final_nbr_weights_v contains delta_Q for moving src to the neighboring - // - // They have been shuffled to the gpus responsible for their source vertex - // - // FIXME: Think about how this should work. - // I think Leiden is broken. I don't think that the code we have - // actually does anything. For now I'm going to ignore Leiden in - // MNMG, we can reconsider this later. - // - // If we ignore Leiden, I'd like to think about whether the reduction - // should occur now... - // - - // - // Sort the results, pick the largest version - // - thrust::sort(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple( - final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), - [] __device__(auto left, auto right) { - if (thrust::get<0>(left) < thrust::get<0>(right)) return true; - if (thrust::get<0>(left) > thrust::get<0>(right)) return false; - if (thrust::get<1>(left) > thrust::get<1>(right)) return true; - if (thrust::get<1>(left) < thrust::get<1>(right)) return false; - return (thrust::get<2>(left) < thrust::get<2>(right)); - }); - - // - // Now that we're sorted (ascending), the last entry for each src value is the largest. - // - local_cluster_edge_ids_v.resize(final_src_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_src_v.size()), - local_cluster_edge_ids_v.begin(), - [sentinel = std::numeric_limits::max(), - d_src = final_src_v.data().get()] __device__(edge_t edge_id) { - if (edge_id == 0) { return edge_id; } - - if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } - - return sentinel; - }); - - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { - return (edge_id != sentinel); - }, - stream_); - - rmm::device_vector cluster_increase_v(final_src_v.size()); - rmm::device_vector cluster_decrease_v(final_src_v.size()); - rmm::device_vector old_cluster_v(final_src_v.size()); - - // - // Then we can, on each gpu, do a local assignment for all of the - // vertices assigned to that gpu using the up_down logic - // - local_cluster_edge_ids_v = detail::remove_elements_from_vector( - local_cluster_edge_ids_v, - local_cluster_edge_ids_v.begin(), - local_cluster_edge_ids_v.end(), - [d_final_src = final_src_v.data().get(), - d_final_nbr_cluster = final_nbr_cluster_v.data().get(), - d_final_nbr_weights = final_nbr_weights_v.data().get(), - d_cluster_increase = cluster_increase_v.data().get(), - d_cluster_decrease = cluster_decrease_v.data().get(), - d_vertex_weights = src_vertex_weights_cache_v_.data().get(), - d_next_cluster = next_cluster_v.data().get(), - d_old_cluster = old_cluster_v.data().get(), - base_vertex_id = base_vertex_id_, - base_src_vertex_id = base_src_vertex_id_, - up_down] __device__(edge_t idx) { - vertex_t src = d_final_src[idx]; - vertex_t new_cluster = d_final_nbr_cluster[idx]; - vertex_t old_cluster = d_next_cluster[src - base_vertex_id]; - weight_t src_weight = d_vertex_weights[src - base_src_vertex_id]; - - if (d_final_nbr_weights[idx] <= weight_t{0}) return false; - if (new_cluster == old_cluster) return false; - if ((new_cluster > old_cluster) != up_down) return false; - - d_next_cluster[src - base_vertex_id] = new_cluster; - d_cluster_increase[idx] = src_weight; - d_cluster_decrease[idx] = src_weight; - d_old_cluster[idx] = old_cluster; - return true; + current_graph_view_, + thrust::make_zip_iterator(thrust::make_tuple(old_cluster_sum_v.begin(), + d_src_vertex_weights_cache_, + cluster_subtract_v.begin(), + d_src_cluster_cache_, + src_cluster_weights_v.begin())), + + d_dst_cluster_cache_, + map_key_first, + map_key_last, + map_value_first, + [total_edge_weight, resolution] __device__( + auto src, auto neighbor_cluster, auto new_cluster_sum, auto src_info, auto a_new) { + auto old_cluster_sum = thrust::get<0>(src_info); + auto k_k = thrust::get<1>(src_info); + auto cluster_subtract = thrust::get<2>(src_info); + auto src_cluster = thrust::get<3>(src_info); + auto a_old = thrust::get<4>(src_info); + + if (src_cluster == neighbor_cluster) new_cluster_sum -= cluster_subtract; + + weight_t delta_modularity = 2 * (((new_cluster_sum - old_cluster_sum) / total_edge_weight) - + resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / + (total_edge_weight * total_edge_weight)); + + return thrust::make_tuple(neighbor_cluster, delta_modularity); }, - stream_); + [] __device__(auto p1, auto p2) { + auto id1 = thrust::get<0>(p1); + auto id2 = thrust::get<0>(p2); + auto wt1 = thrust::get<1>(p1); + auto wt2 = thrust::get<1>(p2); - cluster_increase_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(cluster_increase_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - final_nbr_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), - local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - cluster_decrease_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(cluster_decrease_v.begin(), - local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - old_cluster_v = variable_shuffle( - handle_, - local_cluster_edge_ids_v.size(), - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - thrust::make_transform_iterator( - thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - })); - - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(final_nbr_cluster_v.begin(), cluster_increase_v.begin())), - thrust::make_zip_iterator( - thrust::make_tuple(final_nbr_cluster_v.end(), cluster_increase_v.end())), - [d_cluster_weights = cluster_weights_v_.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto p) { - vertex_t cluster_id = thrust::get<0>(p); - weight_t weight = thrust::get<1>(p); - - atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, weight); - }); - - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(old_cluster_v.begin(), cluster_decrease_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple(old_cluster_v.end(), cluster_decrease_v.end())), - [d_cluster_weights = cluster_weights_v_.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto p) { - vertex_t cluster_id = thrust::get<0>(p); - weight_t weight = thrust::get<1>(p); - - atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, -weight); + return (wt1 < wt2) ? p2 : ((wt1 > wt2) ? p1 : ((id1 < id2) ? p1 : p2)); + }, + thrust::make_tuple(vertex_t{-1}, weight_t{0}), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer)); + + thrust::transform( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + next_cluster_v.begin(), + next_cluster_v.end(), + cugraph::experimental::get_dataframe_buffer_begin>( + output_buffer), + next_cluster_v.begin(), + [up_down] __device__(vertex_t old_cluster, auto p) { + vertex_t new_cluster = thrust::get<0>(p); + weight_t delta_modularity = thrust::get<1>(p); + + return (delta_modularity > weight_t{0}) + ? (((new_cluster > old_cluster) != up_down) ? old_cluster : new_cluster) + : old_cluster; }); - cache_vertex_properties( - cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); - } - - template - std::pair, rmm::device_vector> - combine_local_src_nbr_cluster_weights(hash_t hasher, - compare_t compare, - skip_edge_t skip_edge, - weight_t const *d_weights, - count_t num_weights) - { - rmm::device_vector relevant_edges_v; - rmm::device_vector relevant_edge_weights_v; - - if (num_weights > 0) { - std::size_t capacity{static_cast(num_weights / 0.7)}; - - cuco::static_map hash_map( - capacity, std::numeric_limits::max(), count_t{0}); - detail::create_cuco_pair_t create_cuco_pair; - - CUDA_TRY(cudaStreamSynchronize(stream_)); - - hash_map.insert(thrust::make_transform_iterator(thrust::make_counting_iterator(0), - create_cuco_pair), - thrust::make_transform_iterator( - thrust::make_counting_iterator(num_weights), create_cuco_pair), - hasher, - compare); - - CUDA_TRY(cudaStreamSynchronize(stream_)); - - relevant_edges_v.resize(num_weights); - - relevant_edges_v = detail::remove_elements_from_vector( - relevant_edges_v, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_weights), - [d_hash_map = hash_map.get_device_view(), hasher, compare] __device__(count_t idx) { - auto pos = d_hash_map.find(idx, hasher, compare); - return (pos->first == idx); - }, - stream_); - - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - relevant_edges_v.size(), - [d_hash_map = hash_map.get_device_view(), - hasher, - compare, - d_relevant_edges = relevant_edges_v.data().get()] __device__(count_t idx) mutable { - count_t edge_id = d_relevant_edges[idx]; - auto pos = d_hash_map.find(edge_id, hasher, compare); - pos->second.store(idx); - }); - - relevant_edge_weights_v.resize(relevant_edges_v.size()); - thrust::fill(rmm::exec_policy(stream_)->on(stream_), - relevant_edge_weights_v.begin(), - relevant_edge_weights_v.end(), - weight_t{0}); - - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - num_weights, - [d_hash_map = hash_map.get_device_view(), - hasher, - compare, - skip_edge, - d_relevant_edge_weights = relevant_edge_weights_v.data().get(), - d_weights] __device__(count_t idx) { - if (!skip_edge(idx)) { - auto pos = d_hash_map.find(idx, hasher, compare); - if (pos != d_hash_map.end()) { - atomicAdd(d_relevant_edge_weights + pos->second.load(cuda::std::memory_order_relaxed), - d_weights[idx]); - } - } - }); - } + d_src_cluster_cache_ = cache_src_vertex_properties(next_cluster_v, src_cluster_cache_v_); + d_dst_cluster_cache_ = cache_dst_vertex_properties(next_cluster_v, dst_cluster_cache_v_); - return std::make_pair(relevant_edges_v, relevant_edge_weights_v); - } + std::tie(cluster_keys_v_, cluster_weights_v_) = + cugraph::experimental::transform_reduce_by_adj_matrix_row_key_e( + handle_, + current_graph_view_, + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0), + d_src_cluster_cache_, + [] __device__(auto src, auto dst, auto wt, auto x, auto y) { return wt; }, + weight_t{0}); #endif + } void shrink_graph() { timer_start("shrinking graph"); - rmm::device_uvector numbering_map(0, stream_); + rmm::device_uvector numbering_map(0, handle_.get_stream()); std::tie(current_graph_, numbering_map) = coarsen_graph(handle_, current_graph_view_, dendrogram_->current_level_begin()); current_graph_view_ = current_graph_->view(); - local_num_vertices_ = current_graph_view_.get_number_of_local_vertices(); - local_num_rows_ = current_graph_view_.get_number_of_local_adj_matrix_partition_rows(); - local_num_cols_ = current_graph_view_.get_number_of_local_adj_matrix_partition_cols(); - base_vertex_id_ = current_graph_view_.get_local_vertex_first(); - - local_num_edges_ = thrust::transform_reduce( - thrust::host, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator( - current_graph_view_.get_number_of_local_adj_matrix_partitions()), - [this](auto indx) { - return current_graph_view_.get_number_of_local_adj_matrix_partition_edges(indx); - }, - size_t{0}, - thrust::plus()); - - src_indices_v_.resize(local_num_edges_); - - cugraph::detail::offsets_to_indices( - current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - - rmm::device_uvector numbering_indices(numbering_map.size(), stream_); - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + rmm::device_uvector numbering_indices(numbering_map.size(), handle_.get_stream()); + thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), numbering_indices.begin(), numbering_indices.end(), - base_vertex_id_); + current_graph_view_.get_local_vertex_first()); relabel( handle_, std::make_tuple(static_cast(numbering_map.begin()), static_cast(numbering_indices.begin())), - local_num_vertices_, + current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), dendrogram_->current_level_size()); - timer_stop(stream_); + timer_stop(handle_.get_stream()); } protected: raft::handle_t const &handle_; - cudaStream_t stream_; std::unique_ptr> dendrogram_; - vertex_t number_of_vertices_; - vertex_t base_vertex_id_{0}; - vertex_t base_src_vertex_id_{0}; - vertex_t base_dst_vertex_id_{0}; - int rank_{0}; - - vertex_t local_num_vertices_; - vertex_t local_num_rows_; - vertex_t local_num_cols_; - edge_t local_num_edges_; - // - // Copy of graph + // Initially we run on the input graph view, + // but as we shrink the graph we'll keep the + // current graph here // std::unique_ptr current_graph_{}; graph_view_t current_graph_view_; - // - // For partitioning - // - detail::compute_partition_t compute_partition_; + rmm::device_uvector vertex_weights_v_; + rmm::device_uvector src_vertex_weights_cache_v_; + rmm::device_uvector src_cluster_cache_v_; + rmm::device_uvector dst_cluster_cache_v_; + rmm::device_uvector cluster_keys_v_; + rmm::device_uvector cluster_weights_v_; - rmm::device_vector src_indices_v_; - - // - // Weights and clustering across iterations of algorithm - // - rmm::device_vector vertex_weights_v_; - rmm::device_vector src_vertex_weights_cache_v_{}; - rmm::device_vector dst_vertex_weights_cache_v_{}; - - rmm::device_vector cluster_weights_v_; - rmm::device_vector src_cluster_weights_cache_v_{}; - rmm::device_vector dst_cluster_weights_cache_v_{}; - - rmm::device_vector src_cluster_cache_v_{}; - rmm::device_vector dst_cluster_cache_v_{}; - - rmm::device_vector empty_cache_weight_v_{}; + weight_t *d_src_vertex_weights_cache_; + vertex_t *d_src_cluster_cache_; + vertex_t *d_dst_cluster_cache_; #ifdef TIMING HighResTimer hr_timer_; #endif -}; // namespace experimental +}; } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/shuffle.cuh b/cpp/src/experimental/shuffle.cuh deleted file mode 100644 index 40f3b510b10..00000000000 --- a/cpp/src/experimental/shuffle.cuh +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -namespace cugraph { -namespace experimental { - -namespace detail { - -// -// FIXME: This implementation of variable_shuffle stages the data for transfer -// in host memory. It would be more efficient, I believe, to stage the -// data in device memory, but it would require actually instantiating -// the data in device memory which is already precious in the Louvain -// implementation. We should explore if it's actually more efficient -// through device memory and whether the improvement is worth the extra -// memory required. -// -template -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - // - // We need to compute the size of data movement - // - raft::comms::comms_t const &comms = handle.get_comms(); - - cudaStream_t stream = handle.get_stream(); - int num_gpus = comms.get_size(); - int my_gpu = comms.get_rank(); - - rmm::device_vector local_sizes_v(num_gpus, size_t{0}); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - partition_iter, - partition_iter + n_elements, - [num_gpus, d_local_sizes = local_sizes_v.data().get()] __device__(auto p) { - atomicAdd(d_local_sizes + p, size_t{1}); - }); - - std::vector h_local_sizes_v(num_gpus); - std::vector h_global_sizes_v(num_gpus); - std::vector h_input_v(n_elements); - std::vector h_partitions_v(n_elements); - - thrust::copy(local_sizes_v.begin(), local_sizes_v.end(), h_local_sizes_v.begin()); - thrust::copy(partition_iter, partition_iter + n_elements, h_partitions_v.begin()); - - std::vector requests(2 * num_gpus); - - int request_pos = 0; - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - if (gpu != my_gpu) { - comms.irecv(&h_global_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); - ++request_pos; - comms.isend(&h_local_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); - ++request_pos; - } else { - h_global_sizes_v[gpu] = h_local_sizes_v[gpu]; - } - } - - if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } - - comms.barrier(); - - // - // Now global_sizes contains all of the counts, we need to - // allocate an array of the appropriate size - // - int64_t receive_size = - thrust::reduce(thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end()); - - std::vector temp_data; - - if (receive_size > 0) temp_data.resize(receive_size); - - rmm::device_vector input_v(n_elements); - - auto input_start = input_v.begin(); - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - input_start = thrust::copy_if(rmm::exec_policy(stream)->on(stream), - data_iter, - data_iter + n_elements, - partition_iter, - input_start, - [gpu] __device__(int32_t p) { return p == gpu; }); - } - - thrust::copy(input_v.begin(), input_v.end(), h_input_v.begin()); - - std::vector temp_v(num_gpus + 1); - - thrust::exclusive_scan( - thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end(), temp_v.begin()); - - temp_v[num_gpus] = temp_v[num_gpus - 1] + h_global_sizes_v[num_gpus - 1]; - h_global_sizes_v = temp_v; - - thrust::exclusive_scan( - thrust::host, h_local_sizes_v.begin(), h_local_sizes_v.end(), temp_v.begin()); - - temp_v[num_gpus] = temp_v[num_gpus - 1] + h_local_sizes_v[num_gpus - 1]; - h_local_sizes_v = temp_v; - - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - comms.barrier(); - - request_pos = 0; - - for (int gpu = 0; gpu < num_gpus; ++gpu) { - size_t to_receive = h_global_sizes_v[gpu + 1] - h_global_sizes_v[gpu]; - size_t to_send = h_local_sizes_v[gpu + 1] - h_local_sizes_v[gpu]; - - if (gpu != my_gpu) { - if (to_receive > 0) { - comms.irecv( - temp_data.data() + h_global_sizes_v[gpu], to_receive, gpu, 0, &requests[request_pos]); - ++request_pos; - } - - if (to_send > 0) { - comms.isend( - h_input_v.data() + h_local_sizes_v[gpu], to_send, gpu, 0, &requests[request_pos]); - ++request_pos; - } - } else if (to_receive > 0) { - std::copy(h_input_v.begin() + h_local_sizes_v[gpu], - h_input_v.begin() + h_local_sizes_v[gpu + 1], - temp_data.begin() + h_global_sizes_v[gpu]); - } - } - - comms.barrier(); - - if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } - - comms.barrier(); - - return rmm::device_vector(temp_data); -} - -} // namespace detail - -/** - * @brief shuffle data to the desired partition - * - * MNMG algorithms require shuffling data between partitions - * to get the data to the right location for computation. - * - * This function operates dynamically, there is no - * a priori knowledge about where the data will need - * to be transferred. - * - * This function will be executed on each GPU. Each gpu - * has a portion of the data (specified by begin_data and - * end_data iterators) and an iterator that identifies - * (for each corresponding element) which GPU the data - * should be shuffled to. - * - * The return value will be a device vector containing - * the data received by this GPU. - * - * Note that this function accepts iterators as input. - * `partition_iterator` will be traversed multiple times. - * - * @tparam is_multi_gpu If true, multi-gpu - shuffle will occur - * If false, single GPU - simple copy will occur - * @tparam data_t Type of the data being shuffled - * @tparam iterator_t Iterator referencing data to be shuffled - * @tparam partition_iter_t Iterator identifying the destination partition - * - * @param handle Library handle (RAFT) - * @param n_elements Number of elements to transfer - * @param data_iter Iterator that returns the elements to be transfered - * @param partition_iter Iterator that returns the partition where elements - * should be transfered. - */ -template * = nullptr> -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - return detail::variable_shuffle(handle, n_elements, data_iter, partition_iter); -} - -template * = nullptr> -rmm::device_vector variable_shuffle(raft::handle_t const &handle, - std::size_t n_elements, - iterator_t data_iter, - partition_iter_t partition_iter) -{ - return rmm::device_vector(data_iter, data_iter + n_elements); -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1dc4a5d3eaa..5292f9f9997 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -407,14 +407,6 @@ set(EXPERIMENTAL_PAGERANK_TEST_SRCS ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") -################################################################################################### -# - Experimental LOUVAIN tests ------------------------------------------------------------------- - -set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") - -ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") - ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ @@ -438,6 +430,17 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ########################################################################################### + # - MG LOUVAIN tests --------------------------------------------------------------------- + + set(MG_LOUVAIN_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_test.cpp") + + ConfigureTest(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") + target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + target_link_libraries(MG_LOUVAIN_TEST PRIVATE cugraph) + else(MPI_CXX_FOUND) message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") endif(MPI_CXX_FOUND) diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index d3024282be3..2ebf9a85902 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -9,15 +9,157 @@ * */ #include +#include + +#include +#include +#include +#include + +#include #include -#include -#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +struct Louvain_Usecase { + std::string graph_file_full_path_{}; + bool test_weighted_{false}; + int expected_level_{0}; + float expected_modularity_{0}; + + Louvain_Usecase(std::string const& graph_file_path, + bool test_weighted, + int expected_level, + float expected_modularity) + : test_weighted_(test_weighted), + expected_level_(expected_level), + expected_modularity_(expected_modularity) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path_ = graph_file_path; + } + }; +}; + +class Tests_Louvain : public ::testing::TestWithParam { + public: + Tests_Louvain() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_legacy_test(Louvain_Usecase const& configuration) + { + raft::handle_t handle{}; + + bool directed{false}; + + auto graph = cugraph::test::generate_graph_csr_from_mm( + directed, configuration.graph_file_full_path_); + auto graph_view = graph->view(); + + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + cudaDeviceProp device_prop; + CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); + + if (device_prop.major < 7) { + EXPECT_THROW(louvain(graph_view, + graph_view.get_number_of_vertices(), + configuration.expected_level_, + configuration.expected_modularity_), + cugraph::logic_error); + } else { + louvain(graph_view, + graph_view.get_number_of_vertices(), + configuration.expected_level_, + configuration.expected_modularity_); + } + } -#include + template + void run_current_test(Louvain_Usecase const& configuration) + { + raft::handle_t handle{}; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path_, configuration.test_weighted_, false); + + auto graph_view = graph.view(); + + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + cudaDeviceProp device_prop; + CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); + + if (device_prop.major < 7) { + EXPECT_THROW(louvain(graph_view, + graph_view.get_number_of_local_vertices(), + configuration.expected_level_, + configuration.expected_modularity_), + cugraph::logic_error); + } else { + louvain(graph_view, + graph_view.get_number_of_local_vertices(), + configuration.expected_level_, + configuration.expected_modularity_); + } + } + + template + void louvain(graph_t const& graph_view, + typename graph_t::vertex_type num_vertices, + int expected_level, + float expected_modularity) + { + using vertex_t = typename graph_t::vertex_type; + using weight_t = typename graph_t::weight_type; + + raft::handle_t handle{}; + + rmm::device_uvector clustering_v(num_vertices, handle.get_stream()); + size_t level; + weight_t modularity; + + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, clustering_v.data(), size_t{100}, weight_t{1}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + float compare_modularity = static_cast(modularity); -TEST(louvain, success) + ASSERT_FLOAT_EQ(compare_modularity, expected_modularity); + ASSERT_EQ(level, expected_level); + } +}; + +// FIXME: add tests for type combinations + +TEST(louvain_legacy, success) { raft::handle_t handle; @@ -84,15 +226,13 @@ TEST(louvain, success) int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_FLOAT_EQ(modularity, 0.408695); ASSERT_EQ(cluster_id, result_h); } } -TEST(louvain_renumbered, success) +TEST(louvain_legacy_renumbered, success) { raft::handle_t handle; @@ -157,11 +297,25 @@ TEST(louvain_renumbered, success) int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_FLOAT_EQ(modularity, 0.41880345); } } +TEST_P(Tests_Louvain, CheckInt32Int32FloatFloatLegacy) +{ + run_legacy_test(GetParam()); +} + +TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +// FIXME: Expand testing once we evaluate RMM memory use +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_Louvain, + ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 3, 0.408695))); + CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu new file mode 100644 index 00000000000..a7f95e6d718 --- /dev/null +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_louvain_helper.hpp" + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + T const *d_input, + size_t size) +{ + auto rx_sizes = + cugraph::experimental::host_scalar_gather(handle.get_comms(), size, 0, handle.get_stream()); + std::vector rx_displs(static_cast(handle.get_comms().get_rank()) == 0 + ? handle.get_comms().get_size() + : int{0}, + size_t{0}); + if (static_cast(handle.get_comms().get_rank()) == 0) { + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + } + + auto total_size = thrust::reduce(thrust::host, rx_sizes.begin(), rx_sizes.end()); + rmm::device_uvector gathered_v(total_size, handle.get_stream()); + + cugraph::experimental::device_gatherv(handle.get_comms(), + d_input, + gathered_v.data(), + size, + rx_sizes, + rx_displs, + 0, + handle.get_stream()); + + return gathered_v; +} + +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2) +{ + vertex_t max = 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + v1.begin(), + v1.end(), + vertex_t{0}); + + rmm::device_uvector map(max, size_t{0}); + + auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); + + thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + iter, + iter + v1.size(), + [d_map = map.data()] __device__(auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + d_map[e1] = e2; + }); + + auto error_count = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + iter, + iter + v1.size(), + [d_map = map.data()] __device__(auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + return (d_map[e1] != e2); + }); + + return (error_count == 0); +} + +template +void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, + rmm::device_uvector &edgelist_rows_v, + rmm::device_uvector &edgelist_cols_v, + rmm::device_uvector &renumber_map_gathered_v) +{ + rmm::device_uvector index_v(renumber_map_gathered_v.size(), handle.get_stream()); + + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(renumber_map_gathered_v.size()), + [d_renumber_map_gathered = renumber_map_gathered_v.data(), d_index = index_v.data()] __device__( + auto idx) { d_index[d_renumber_map_gathered[idx]] = idx; }); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_rows_v.begin(), + edgelist_rows_v.end(), + edgelist_rows_v.begin(), + [d_index = index_v.data()] __device__(auto v) { return d_index[v]; }); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_cols_v.begin(), + edgelist_cols_v.end(), + edgelist_cols_v.begin(), + [d_index = index_v.data()] __device__(auto v) { return d_index[v]; }); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) +{ + edge_t number_of_edges{0}; + raft::update_host( + &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); + CUDA_TRY(cudaStreamSynchronize(stream)); + rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights( + compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + + // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can + // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA + // warp per vertex, and low-degree vertices using one CUDA thread per block + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [compressed_sparse_offsets, + major_first, + p_majors = edgelist_major_vertices.begin()] __device__(auto v) { + auto first = compressed_sparse_offsets[v - major_first]; + auto last = compressed_sparse_offsets[v - major_first + 1]; + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_indices, + compressed_sparse_indices + number_of_edges, + edgelist_minor_vertices.begin()); + if (compressed_sparse_weights != nullptr) { + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + } + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +template +void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */, + cudaStream_t stream) +{ + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + + size_t number_of_edges{0}; + if (edgelist_weights.size() > 0) { + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin()); + + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); + auto it = thrust::reduce_by_key( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin())), + tmp_edgelist_weights.begin()); + number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + + edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); + edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); + edgelist_weights = std::move(tmp_edgelist_weights); + } else { + thrust::sort(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + number_of_edges = thrust::distance(pair_first, it); + } + + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_weights.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.shrink_to_fit(stream); + edgelist_weights.shrink_to_fit(stream); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) +{ + // FIXME: it might be possible to directly create relabled & coarsened edgelist from the + // compressed sparse format to save memory + + rmm::device_uvector edgelist_major_vertices(0, stream); + rmm::device_uvector edgelist_minor_vertices(0, stream); + rmm::device_uvector edgelist_weights(0, stream); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_edgelist(compressed_sparse_offsets, + compressed_sparse_indices, + compressed_sparse_weights, + major_first, + major_last, + stream); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + thrust::transform( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + sort_and_coarsen_edgelist( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +// single-GPU version +template +std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const + &graph_view, + vertex_t const *labels) +{ + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + labels, + labels, + vertex_t{0}, + graph_view.get_number_of_vertices(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::edgelist_t edgelist{}; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + + vertex_t new_number_of_vertices = + 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + graph_view.get_number_of_vertices(), + vertex_t{0}, + thrust::maximum()); + + return std::make_unique< + cugraph::experimental::graph_t>( + handle, + edgelist, + new_number_of_vertices, + cugraph::experimental::graph_properties_t{graph_view.is_symmetric(), false}, + true); +} + +// explicit instantiation + +template void single_gpu_renumber_edgelist_given_number_map( + raft::handle_t const &handle, + rmm::device_uvector &d_edgelist_rows, + rmm::device_uvector &d_edgelist_cols, + rmm::device_uvector &d_renumber_map_gathered_v); + +template rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + int const *d_input, + size_t size); + +template bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2); + +template std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const &graph_view, + int32_t const *labels); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp new file mode 100644 index 00000000000..43eb294cd13 --- /dev/null +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, + T const *d_input, + size_t size); + +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + rmm::device_uvector const &v1, + rmm::device_uvector const &v2); + +template +void single_gpu_renumber_edgelist_given_number_map( + raft::handle_t const &handle, + rmm::device_uvector &d_edgelist_rows, + rmm::device_uvector &d_edgelist_cols, + rmm::device_uvector &d_renumber_map_gathered_v); + +template +std::unique_ptr> +coarsen_graph( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const + &graph_view, + vertex_t const *labels); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp new file mode 100644 index 00000000000..f6596a6b59a --- /dev/null +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_louvain_helper.hpp" + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include + +void compare(float modularity, float sg_modularity) { ASSERT_FLOAT_EQ(modularity, sg_modularity); } +void compare(double modularity, double sg_modularity) +{ + ASSERT_DOUBLE_EQ(modularity, sg_modularity); +} + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_CASE_P() +// +struct Louvain_Usecase { + std::string graph_file_full_path{}; + bool weighted{false}; + size_t max_level; + double resolution; + + // FIXME: We really should have a Graph_Testparms_Base class or something + // like that which can handle this graph_full_path thing. + // + Louvain_Usecase(std::string const& graph_file_path, + bool weighted, + size_t max_level, + double resolution) + : weighted(weighted), max_level(max_level), resolution(resolution) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class Louvain_MG_Testfixture : public ::testing::TestWithParam { + public: + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + // Run once for each test instance + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of MNMG Louvain with the results of running + // each step of SG Louvain, renumbering the coarsened graphs based + // on the MNMG renumbering. + template + void compare_sg_results(raft::handle_t const& handle, + std::string const& graph_filename, + rmm::device_uvector& d_renumber_map_gathered_v, + cugraph::Dendrogram const& dendrogram, + weight_t resolution, + int rank, + weight_t modularity) + { + auto sg_graph = + std::make_unique>( + handle); + rmm::device_uvector d_clustering_v(0, handle.get_stream()); + weight_t sg_modularity; + + if (rank == 0) { + // Create initial SG graph, renumbered according to the MNMG renumber map + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + vertex_t number_of_vertices{}; + bool is_symmetric{}; + + std::tie( + d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) = + cugraph::test::read_edgelist_from_matrix_market_file( + handle, graph_filename, true); + + rmm::device_uvector d_vertices(number_of_vertices, handle.get_stream()); + std::vector h_vertices(number_of_vertices); + + d_clustering_v.resize(d_vertices.size(), handle.get_stream()); + + thrust::sequence(thrust::host, h_vertices.begin(), h_vertices.end(), vertex_t{0}); + raft::update_device( + d_vertices.data(), h_vertices.data(), d_vertices.size(), handle.get_stream()); + + // renumber using d_renumber_map_gathered_v + cugraph::test::single_gpu_renumber_edgelist_given_number_map( + handle, d_edgelist_rows, d_edgelist_cols, d_renumber_map_gathered_v); + + std::tie(*sg_graph, std::ignore) = + cugraph::test::generate_graph_from_edgelist( + handle, + std::move(d_vertices), + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + is_symmetric, + true, + false); + } + + std::for_each( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(dendrogram.num_levels()), + [&dendrogram, &sg_graph, &d_clustering_v, &sg_modularity, &handle, resolution, rank]( + size_t i) { + auto d_dendrogram_gathered_v = cugraph::test::gather_distributed_vector( + handle, dendrogram.get_level_ptr_nocheck(i), dendrogram.get_level_size_nocheck(i)); + + if (rank == 0) { + auto graph_view = sg_graph->view(); + + d_clustering_v.resize(graph_view.get_number_of_vertices(), handle.get_stream()); + + std::tie(std::ignore, sg_modularity) = + cugraph::louvain(handle, graph_view, d_clustering_v.data(), size_t{1}, resolution); + + EXPECT_TRUE(cugraph::test::compare_renumbered_vectors( + handle, d_clustering_v, d_dendrogram_gathered_v)); + + sg_graph = + cugraph::test::coarsen_graph(handle, graph_view, d_dendrogram_gathered_v.data()); + } + }); + + if (rank == 0) compare(modularity, sg_modularity); + } + + // Compare the results of running louvain on multiple GPUs to that of a + // single-GPU run for the configuration in param. Note that MNMG Louvain + // and single GPU Louvain are ONLY deterministic through a single + // iteration of the outer loop. Renumbering of the partitions when coarsening + // the graph is a function of the number of GPUs in the GPU cluster. + template + void run_test(const Louvain_Usecase& param) + { + raft::handle_t handle; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + cudaStream_t stream = handle.get_stream(); + + cugraph::experimental::graph_t mg_graph(handle); + + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + + std::tie(mg_graph, d_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, param.graph_file_full_path, true, true); + + auto mg_graph_view = mg_graph.view(); + + std::unique_ptr> dendrogram; + weight_t modularity; + + std::tie(dendrogram, modularity) = + cugraph::louvain(handle, mg_graph_view, param.max_level, param.resolution); + + SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); + + auto d_renumber_map_gathered_v = cugraph::test::gather_distributed_vector( + handle, d_renumber_map_labels.data(), d_renumber_map_labels.size()); + + compare_sg_results(handle, + param.graph_file_full_path, + d_renumber_map_gathered_v, + *dendrogram, + param.resolution, + comm_rank, + modularity); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(Louvain_MG_Testfixture, CheckInt32Int32Float) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Louvain_MG_Testfixture, + ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true, 100, 1) + //,Louvain_Usecase("test/datasets/smallworld.mtx", true, 100, 1) + )); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu deleted file mode 100644 index 56fb2c109bf..00000000000 --- a/cpp/tests/experimental/louvain_test.cu +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governin_from_mtxg permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -typedef struct Louvain_Usecase_t { - std::string graph_file_full_path{}; - bool test_weighted{false}; - - Louvain_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path = graph_file_path; - } - }; -} Louvain_Usecase; - -class Tests_Louvain : public ::testing::TestWithParam { - public: - Tests_Louvain() {} - static void SetupTestCase() {} - static void TearDownTestCase() {} - - virtual void SetUp() {} - virtual void TearDown() {} - - template - void run_current_test(Louvain_Usecase const& configuration) - { - raft::handle_t handle{}; - - std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; - - cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - cugraph::test::read_graph_from_matrix_market_file( - handle, configuration.graph_file_full_path, configuration.test_weighted, false); - - auto graph_view = graph.view(); - - // "FIXME": remove this check once we drop support for Pascal - // - // Calling louvain on Pascal will throw an exception, we'll check that - // this is the behavior while we still support Pascal (device_prop.major < 7) - // - cudaDeviceProp device_prop; - CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); - - if (device_prop.major < 7) { - EXPECT_THROW(louvain(graph_view), cugraph::logic_error); - } else { - louvain(graph_view); - } - } - - template - void louvain(graph_t const& graph_view) - { - using vertex_t = typename graph_t::vertex_type; - using weight_t = typename graph_t::weight_type; - - raft::handle_t handle{}; - - rmm::device_vector clustering_v(graph_view.get_number_of_local_vertices()); - size_t level; - weight_t modularity; - - std::tie(level, modularity) = - cugraph::louvain(handle, graph_view, clustering_v.data().get(), size_t{100}, weight_t{1}); - - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - std::cout << "level = " << level << std::endl; - std::cout << "modularity = " << modularity << std::endl; - } -}; - -// FIXME: add tests for type combinations -TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) -{ - run_current_test(GetParam()); -} - -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_Louvain, - ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true) -#if 0 - , - Louvain_Usecase("test/datasets/web-Google.mtx", true), - Louvain_Usecase("test/datasets/ljournal-2008.mtx", true), - Louvain_Usecase("test/datasets/webbase-1M.mtx", true) -#endif - )); - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index e8f11acfbf4..79a86e1fc95 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -160,6 +160,11 @@ inline auto parse_test_options(int argc, char **argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cugraph::test::create_memory_resource(rmm_mode); \ + \ + if (comm_rank != 0) { \ + auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); \ + delete listeners.Release(listeners.default_result_printer()); \ + } \ rmm::mr::set_current_device_resource(resource.get()); \ auto ret = RUN_ALL_TESTS(); \ MPI_TRY(MPI_Finalize()); \ From 22e9e2bb6b7fb6599f3d7c5b1cc35683591fd6c6 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Tue, 30 Mar 2021 09:23:52 -0500 Subject: [PATCH 34/51] Add indirection and replace algorithms with new renumbering (#1484) Authors: - @Iroy30 Approvers: - Chuck Hastings (@ChuckHastings) - Alex Fender (@afender) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1484 --- python/cugraph/community/egonet_wrapper.pyx | 3 +- .../community/ktruss_subgraph_wrapper.pyx | 4 + .../community/subgraph_extraction_wrapper.pyx | 1 + python/cugraph/cores/k_core_wrapper.pyx | 4 + .../dask/centrality/katz_centrality.py | 12 +- python/cugraph/dask/common/input_utils.py | 14 +- python/cugraph/dask/community/louvain.py | 13 +- python/cugraph/dask/link_analysis/pagerank.py | 13 +- python/cugraph/dask/traversal/bfs.py | 13 +- python/cugraph/dask/traversal/sssp.py | 12 +- python/cugraph/structure/new_number_map.py | 317 -------- python/cugraph/structure/number_map.py | 686 ++++++------------ python/cugraph/structure/renumber_wrapper.pyx | 127 ++-- python/cugraph/tests/test_renumber.py | 140 ++-- 14 files changed, 417 insertions(+), 942 deletions(-) delete mode 100644 python/cugraph/structure/new_number_map.py diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index ff9f2b8b3de..ead41705628 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -33,7 +33,7 @@ def egonet(input_graph, vertices, radius=1): np.dtype("float32") : numberTypeEnum.floatType, np.dtype("double") : numberTypeEnum.doubleType} - [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) vertex_t = src.dtype edge_t = np.dtype("int32") weights = None @@ -54,6 +54,7 @@ def egonet(input_graph, vertices, radius=1): weight_t = np.dtype("float32") # Pointers for egonet + vertices = vertices.astype('int32') cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] n_subgraphs = vertices.size n_streams = 1 diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx index 9f38b33d774..d3b7a38ba41 100644 --- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx @@ -33,6 +33,10 @@ def ktruss_subgraph_double(input_graph, k, use_weights): def ktruss_subgraph(input_graph, k, use_weights): + [input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if graph_primtypes_wrapper.weight_type(input_graph) == np.float64 and use_weights: return ktruss_subgraph_double(input_graph, k, use_weights) else: diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 31c5d2372f0..46dc5c07eaf 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -59,6 +59,7 @@ def subgraph(input_graph, vertices): if weights is not None: c_weights = weights.__cuda_array_interface__['data'][0] + [vertices] = graph_primtypes_wrapper.datatype_cast([vertices], [np.int32]) cdef uintptr_t c_vertices = vertices.__cuda_array_interface__['data'][0] if use_float: diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index a0ef99a8e8b..28bb191f4f4 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -49,6 +49,10 @@ def k_core(input_graph, k, core_number): """ Call k_core """ + [input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if graph_primtypes_wrapper.weight_type(input_graph) == np.float64: return k_core_double(input_graph, k, core_number) else: diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py index e690e291928..a2f83a0b2a8 100644 --- a/python/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/dask/centrality/katz_centrality.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.centrality import\ mg_katz_centrality_wrapper as mg_katz_centrality import cugraph.comms.comms as Comms @@ -133,11 +133,9 @@ def katz_centrality(input_graph, client = default_client() input_graph.compute_renumber_edge_list(transposed=True) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=True) + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index bbc914da502..0248f429a09 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -217,3 +217,15 @@ def get_distributed_data(input_ddf): if data.worker_info is None and comms is not None: data.calculate_worker_and_rank_info(comms) return data + + +def get_vertex_partition_offsets(input_graph): + import cudf + renumber_vertex_count = input_graph.renumber_map.implementation.ddf.\ + map_partitions(len).compute() + renumber_vertex_cumsum = renumber_vertex_count.cumsum() + vertex_dtype = input_graph.edgelist.edgelist_df['src'].dtype + vertex_partition_offsets = cudf.Series([0], dtype=vertex_dtype) + vertex_partition_offsets = vertex_partition_offsets.append(cudf.Series( + renumber_vertex_cumsum, dtype=vertex_dtype)) + return vertex_partition_offsets diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 495061c0f81..c9af0f526c9 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -16,8 +16,8 @@ from dask.distributed import wait, default_client import cugraph.comms.comms as Comms -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.community import louvain_wrapper as c_mg_louvain from cugraph.utilities.utils import is_cuda_version_less_than @@ -86,12 +86,9 @@ def louvain(input_graph, max_iter=100, resolution=1.0): input_graph.compute_renumber_edge_list(transposed=False) sorted_by_degree = True - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=False) - + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index d8a76f1231e..bfaada85a6f 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.link_analysis import mg_pagerank_wrapper as mg_pagerank import cugraph.comms.comms as Comms import dask_cudf @@ -124,11 +124,10 @@ def pagerank(input_graph, client = default_client() input_graph.compute_renumber_edge_list(transposed=True) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(input_graph, transposed=True) + + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 51e0dc0de5d..d108730f665 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.traversal import mg_bfs_wrapper as mg_bfs import cugraph.comms.comms as Comms import cudf @@ -91,11 +91,10 @@ def bfs(graph, client = default_client() graph.compute_renumber_edge_list(transposed=False) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(graph, transposed=False) + ddf = graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(graph) + num_verts = vertex_partition_offsets.iloc[-1] + num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index 52f2b9b256c..32e7401023a 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -14,8 +14,8 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure.shuffle import shuffle +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) from cugraph.dask.traversal import mg_sssp_wrapper as mg_sssp import cugraph.comms.comms as Comms import cudf @@ -91,11 +91,9 @@ def sssp(graph, client = default_client() graph.compute_renumber_edge_list(transposed=False) - (ddf, - num_verts, - partition_row_size, - partition_col_size, - vertex_partition_offsets) = shuffle(graph, transposed=False) + ddf = graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(graph) + num_verts = vertex_partition_offsets.iloc[-1] num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/structure/new_number_map.py b/python/cugraph/structure/new_number_map.py deleted file mode 100644 index f8a2164d2c4..00000000000 --- a/python/cugraph/structure/new_number_map.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data -from cugraph.structure import renumber_wrapper as c_renumber -import cugraph.comms as Comms -import dask_cudf -import numpy as np -import cudf -import cugraph.structure.number_map as legacy_number_map - - -def call_renumber(sID, - data, - num_edges, - is_mnmg, - store_transposed): - wid = Comms.get_worker_id(sID) - handle = Comms.get_handle(sID) - return c_renumber.renumber(data[0], - num_edges, - wid, - handle, - is_mnmg, - store_transposed) - - -class NumberMap: - - class SingleGPU: - def __init__(self, df, src_col_names, dst_col_names, id_type, - store_transposed): - self.col_names = NumberMap.compute_vals(src_col_names) - self.df = cudf.DataFrame() - self.id_type = id_type - self.store_transposed = store_transposed - self.numbered = False - - def to_internal_vertex_id(self, df, col_names): - tmp_df = df[col_names].rename( - columns=dict(zip(col_names, self.col_names)), copy=False - ) - index_name = NumberMap.generate_unused_column_name(df.columns) - tmp_df[index_name] = tmp_df.index - return ( - self.df.merge(tmp_df, on=self.col_names, how="right") - .sort_values(index_name) - .drop(columns=[index_name]) - .reset_index()["id"] - ) - - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.df.merge( - df, - right_on=internal_column_name, - left_on="id", - how="right", - ) - if internal_column_name != "id": - tmp_df = tmp_df.drop(columns=["id"]) - if external_column_names is None: - return tmp_df - else: - return tmp_df.rename( - columns=dict(zip(self.col_names, external_column_names)), - copy=False, - ) - - class MultiGPU: - def __init__( - self, ddf, src_col_names, dst_col_names, id_type, store_transposed - ): - self.col_names = NumberMap.compute_vals(src_col_names) - self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) - self.val_types["count"] = np.int32 - self.id_type = id_type - self.store_transposed = store_transposed - self.numbered = False - - def to_internal_vertex_id(self, ddf, col_names): - return self.ddf.merge( - ddf, - right_on=col_names, - left_on=self.col_names, - how="right", - )["global_id"] - - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.ddf.merge( - df, - right_on=internal_column_name, - left_on="global_id", - how="right" - ).map_partitions(lambda df: df.drop(columns="global_id")) - - if external_column_names is None: - return tmp_df - else: - return tmp_df.map_partitions( - lambda df: - df.rename( - columns=dict( - zip(self.col_names, external_column_names) - ), - copy=False - ) - ) - - def __init__(self, id_type=np.int32): - self.implementation = None - self.id_type = id_type - - def compute_vals_types(df, column_names): - """ - Helper function to compute internal column names and types - """ - return { - str(i): df[column_names[i]].dtype for i in range(len(column_names)) - } - - def generate_unused_column_name(column_names): - """ - Helper function to generate an unused column name - """ - name = 'x' - while name in column_names: - name = name + "x" - - return name - - def compute_vals(column_names): - """ - Helper function to compute internal column names based on external - column names - """ - return [str(i) for i in range(len(column_names))] - - def renumber(df, src_col_names, dst_col_names, preserve_order=False, - store_transposed=False): - - if isinstance(src_col_names, list): - renumber_type = 'legacy' - # elif isinstance(df[src_col_names].dtype, string): - # renumber_type = 'legacy' - else: - renumber_type = 'experimental' - - if renumber_type == 'legacy': - renumber_map, renumbered_df = legacy_number_map.renumber( - df, - src_col_names, - dst_col_names, - preserve_order, - store_transposed) - # Add shuffling once algorithms are switched to new renumber - # (ddf, - # num_verts, - # partition_row_size, - # partition_col_size, - # vertex_partition_offsets) = shuffle(input_graph, transposed=True) - return renumber_map, renumbered_df - - renumber_map = NumberMap() - if not isinstance(src_col_names, list): - src_col_names = [src_col_names] - dst_col_names = [dst_col_names] - if type(df) is cudf.DataFrame: - renumber_map.implementation = NumberMap.SingleGPU( - df, src_col_names, dst_col_names, renumber_map.id_type, - store_transposed - ) - elif type(df) is dask_cudf.DataFrame: - renumber_map.implementation = NumberMap.MultiGPU( - df, src_col_names, dst_col_names, renumber_map.id_type, - store_transposed - ) - else: - raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") - - num_edges = len(df) - - if isinstance(df, dask_cudf.DataFrame): - is_mnmg = True - else: - is_mnmg = False - - if is_mnmg: - client = default_client() - data = get_distributed_data(df) - result = [(client.submit(call_renumber, - Comms.get_session_id(), - wf[1], - num_edges, - is_mnmg, - store_transposed, - workers=[wf[0]]), wf[0]) - for idx, wf in enumerate(data.worker_to_parts.items())] - wait(result) - - def get_renumber_map(data): - return data[0] - - def get_renumbered_df(data): - return data[1] - - renumbering_map = dask_cudf.from_delayed( - [client.submit(get_renumber_map, - data, - workers=[wf]) - for (data, wf) in result]) - renumbered_df = dask_cudf.from_delayed( - [client.submit(get_renumbered_df, - data, - workers=[wf]) - for (data, wf) in result]) - - renumber_map.implementation.ddf = renumbering_map - renumber_map.implementation.numbered = True - - return renumbered_df, renumber_map - else: - renumbering_map, renumbered_df = c_renumber.renumber( - df, - num_edges, - 0, - Comms.get_default_handle(), - is_mnmg, - store_transposed) - renumber_map.implementation.df = renumbering_map - renumber_map.implementation.numbered = True - return renumbered_df, renumber_map - - def unrenumber(self, df, column_name, preserve_order=False): - """ - Given a DataFrame containing internal vertex ids in the identified - column, replace this with external vertex ids. If the renumbering - is from a single column, the output dataframe will use the same - name for the external vertex identifiers. If the renumbering is from - a multi-column input, the output columns will be labeled 0 through - n-1 with a suffix of _column_name. - Note that this function does not guarantee order or partitioning in - multi-GPU mode. - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing internal vertex identifiers that will be - converted into external vertex identifiers. - column_name: string - Name of the column containing the internal vertex id. - preserve_order: (optional) bool - If True, preserve the ourder of the rows in the output - DataFrame to match the input DataFrame - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. The external - vertex identifiers are added to the DataFrame, the internal - vertex identifier column is removed from the dataframe. - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> - >>> df, number_map = NumberMap.renumber(df, '0', '1') - >>> - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, 'src', 'dst') - >>> - >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, - >>> tol = 1.0e-05) - >>> - >>> pr = number_map.unrenumber(pr, 'vertex') - >>> - """ - if len(self.col_names) == 1: - # Output will be renamed to match input - mapping = {"0": column_name} - else: - # Output will be renamed to ${i}_${column_name} - mapping = {} - for nm in self.col_names: - mapping[nm] = nm + "_" + column_name - - if preserve_order: - index_name = NumberMap.generate_unused_column_name(df) - df[index_name] = df.index - - df = self.from_internal_vertex_id(df, column_name, drop=True) - - if preserve_order: - df = df.sort_values( - index_name - ).drop(columns=index_name).reset_index(drop=True) - - if type(df) is dask_cudf.DataFrame: - return df.map_partitions( - lambda df: df.rename(columns=mapping, copy=False) - ) - else: - return df.rename(columns=mapping, copy=False) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index deb2b9f4114..5f801eb0d90 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -1,4 +1,5 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,100 +11,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -import cudf +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure import renumber_wrapper as c_renumber +import cugraph.comms.comms as Comms import dask_cudf import numpy as np -import bisect +import cudf + + +def call_renumber(sID, + data, + num_edges, + is_mnmg, + store_transposed): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return c_renumber.renumber(data[0], + num_edges, + wid, + handle, + is_mnmg, + store_transposed) class NumberMap: - """ - Class used to translate external vertex ids to internal vertex ids - in the cuGraph framework. - - Internal vertex ids are assigned by hashing the external vertex ids - into a structure to eliminate duplicates, and the resulting list - of unique vertices are assigned integers from [0, V) where V is - the number of unique vertices. - - In Single GPU mode, internal vertex ids are constructed using - cudf functions, with a cudf.DataFrame containing the mapping - from external vertex identifiers and internal vertex identifiers - allowing for mapping vertex identifiers in either direction. In - this mode, the order of the output from the mapping functions is - non-deterministic. cudf makes no guarantees about order. If - matching the input order is required set the preserve_order - to True. - - In Multi GPU mode, internal vertex ids are constucted using - dask_cudf functions, with a dask_cudf.DataFrame containing - the mapping from external vertex identifiers and internal - vertex identifiers allowing for mapping vertex identifiers - in either direction. In this mode, the partitioning of - the number_map and the output from any of the mapping functions - are non-deterministic. dask_cudf makes no guarantees about the - partitioning or order of the output. As of this release, - there is no mechanism for controlling that, this will be - addressed at some point. - """ class SingleGPU: def __init__(self, df, src_col_names, dst_col_names, id_type, store_transposed): self.col_names = NumberMap.compute_vals(src_col_names) - self.df = cudf.DataFrame() + self.src_col_names = src_col_names + self.dst_col_names = dst_col_names + self.df = df self.id_type = id_type self.store_transposed = store_transposed - - source_count = 0 - dest_count = 0 - - if store_transposed: - dest_count = 1 - else: - source_count = 1 - - tmp = ( - df[src_col_names] - .assign(count=source_count) - .groupby(src_col_names) - .sum() - .reset_index() - .rename( - columns=dict(zip(src_col_names, self.col_names)), - copy=False, - ) - ) - - if dst_col_names is not None: - tmp_dst = ( - df[dst_col_names] - .assign(count=dest_count) - .groupby(dst_col_names) - .sum() - .reset_index() - ) - for newname, oldname in zip(self.col_names, dst_col_names): - self.df[newname] = tmp[newname].append(tmp_dst[oldname]) - self.df['count'] = tmp['count'].append(tmp_dst['count']) - else: - for newname in self.col_names: - self.df[newname] = tmp[newname] - self.df['count'] = tmp['count'] - self.numbered = False - def compute(self): - if not self.numbered: - tmp = self.df.groupby(self.col_names).sum().sort_values( - 'count', ascending=False - ).reset_index().drop(columns='count') - - tmp["id"] = tmp.index.astype(self.id_type) - self.df = tmp - self.numbered = True - def to_internal_vertex_id(self, df, col_names): tmp_df = df[col_names].rename( columns=dict(zip(col_names, self.col_names)), copy=False @@ -117,6 +63,25 @@ def to_internal_vertex_id(self, df, col_names): .reset_index()["id"] ) + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.df.merge( + df, + right_on=internal_column_name, + left_on="id", + how="right", + ) + if internal_column_name != "id": + tmp_df = tmp_df.drop(columns=["id"]) + if external_column_names is None: + return tmp_df + else: + return tmp_df.rename( + columns=dict(zip(self.col_names, external_column_names)), + copy=False, + ) + def add_internal_vertex_id(self, df, id_column_name, col_names, drop, preserve_order): ret = None @@ -162,76 +127,39 @@ def add_internal_vertex_id(self, df, id_column_name, col_names, return ret - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.df.merge( - df, - right_on=internal_column_name, - left_on="id", - how="right", - ) - if internal_column_name != "id": - tmp_df = tmp_df.drop(columns=["id"]) - if external_column_names is None: - return tmp_df - else: - return tmp_df.rename( - columns=dict(zip(self.col_names, external_column_names)), - copy=False, - ) - - class MultiGPU: - def extract_vertices( - df, src_col_names, dst_col_names, - internal_col_names, store_transposed - ): - source_count = 0 - dest_count = 0 - - if store_transposed: - dest_count = 1 - else: - source_count = 1 + def indirection_map(self, df, src_col_names, dst_col_names): + tmp_df = cudf.DataFrame() - s = ( + tmp = ( df[src_col_names] - .assign(count=source_count) .groupby(src_col_names) - .sum() + .count() .reset_index() .rename( - columns=dict(zip(src_col_names, internal_col_names)), + columns=dict(zip(src_col_names, self.col_names)), copy=False, ) ) - d = None if dst_col_names is not None: - d = ( + tmp_dst = ( df[dst_col_names] - .assign(count=dest_count) .groupby(dst_col_names) - .sum() + .count() .reset_index() - .rename( - columns=dict(zip(dst_col_names, internal_col_names)), - copy=False, - ) ) + for newname, oldname in zip(self.col_names, dst_col_names): + tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) + else: + for newname in self.col_names: + tmp_df[newname] = tmp[newname] - reply = cudf.DataFrame() - - for i in internal_col_names: - if d is None: - reply[i] = s[i] - else: - reply[i] = s[i].append(d[i]) - - reply['count'] = s['count'].append(d['count']) - - return reply + tmp_df = tmp_df.groupby(self.col_names).count().reset_index() + tmp_df["id"] = tmp_df.index.astype(self.id_type) + self.df = tmp_df + return tmp_df + class MultiGPU: def __init__( self, ddf, src_col_names, dst_col_names, id_type, store_transposed ): @@ -239,110 +167,10 @@ def __init__( self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) self.val_types["count"] = np.int32 self.id_type = id_type + self.ddf = ddf self.store_transposed = store_transposed - self.ddf = ddf.map_partitions( - NumberMap.MultiGPU.extract_vertices, - src_col_names, - dst_col_names, - self.col_names, - store_transposed, - meta=self.val_types, - ) self.numbered = False - # Function to compute partitions based on known divisions of the - # hash value - def compute_partition(df, divisions): - sample = df.index[0] - partition_id = bisect.bisect_right(divisions, sample) - 1 - return df.assign(partition=partition_id) - - def assign_internal_identifiers_kernel( - local_id, partition, global_id, base_addresses - ): - for i in range(len(local_id)): - global_id[i] = local_id[i] + base_addresses[partition[i]] - - def assign_internal_identifiers(df, base_addresses, id_type): - df = df.assign(local_id=df.index.astype(np.int64)) - df = df.apply_rows( - NumberMap.MultiGPU.assign_internal_identifiers_kernel, - incols=["local_id", "partition"], - outcols={"global_id": id_type}, - kwargs={"base_addresses": base_addresses}, - ) - - return df.drop(columns=["local_id", "hash", "partition"]) - - def assign_global_id(self, ddf, base_addresses, val_types): - val_types["global_id"] = self.id_type - del val_types["hash"] - del val_types["partition"] - - ddf = ddf.map_partitions( - lambda df: NumberMap.MultiGPU.assign_internal_identifiers( - df, base_addresses, self.id_type - ), - meta=val_types, - ) - return ddf - - def compute(self): - if not self.numbered: - val_types = self.val_types - val_types["hash"] = np.int32 - - vertices = self.ddf.map_partitions( - lambda df: df.assign(hash=df.hash_columns(self.col_names)), - meta=val_types, - ) - - # Redistribute the ddf based on the hash values - rehashed = vertices.set_index("hash", drop=False) - - # Compute the local partition id (obsolete once - # https://github.com/dask/dask/issues/3707 is completed) - val_types["partition"] = np.int32 - - rehashed_with_partition_id = rehashed.map_partitions( - NumberMap.MultiGPU.compute_partition, - rehashed.divisions, - meta=val_types, - ) - - val_types.pop('count') - - numbering_map = rehashed_with_partition_id.map_partitions( - lambda df: df.groupby( - self.col_names + ["hash", "partition"] - ).sum() - .sort_values('count', ascending=False) - .reset_index() - .drop(columns='count'), - meta=val_types - ) - - # - # Compute base address for each partition - # - counts = numbering_map.map_partitions( - lambda df: df.groupby("partition").count() - ).compute()["hash"].to_pandas() - base_addresses = np.zeros(len(counts) + 1, self.id_type) - - for i in range(len(counts)): - base_addresses[i + 1] = base_addresses[i] + counts[i] - - # - # Update each partition with the base address - # - numbering_map = self.assign_global_id( - numbering_map, cudf.Series(base_addresses), val_types - ) - - self.ddf = numbering_map - self.numbered = True - def to_internal_vertex_id(self, ddf, col_names): return self.ddf.merge( ddf, @@ -351,6 +179,29 @@ def to_internal_vertex_id(self, ddf, col_names): how="right", )["global_id"] + def from_internal_vertex_id( + self, df, internal_column_name, external_column_names + ): + tmp_df = self.ddf.merge( + df, + right_on=internal_column_name, + left_on="global_id", + how="right" + ).map_partitions(lambda df: df.drop(columns="global_id")) + + if external_column_names is None: + return tmp_df + else: + return tmp_df.map_partitions( + lambda df: + df.rename( + columns=dict( + zip(self.col_names, external_column_names) + ), + copy=False + ) + ) + def add_internal_vertex_id(self, ddf, id_column_name, col_names, drop, preserve_order): # At the moment, preserve_order cannot be done on @@ -385,39 +236,50 @@ def add_internal_vertex_id(self, ddf, id_column_name, col_names, drop, return ret - def from_internal_vertex_id( - self, df, internal_column_name, external_column_names - ): - tmp_df = self.ddf.merge( - df, - right_on=internal_column_name, - left_on="global_id", - how="right" - ).map_partitions(lambda df: df.drop(columns="global_id")) + def indirection_map(self, ddf, src_col_names, dst_col_names): - if external_column_names is None: - return tmp_df - else: - return tmp_df.map_partitions( - lambda df: - df.rename( - columns=dict( - zip(self.col_names, external_column_names) - ), - copy=False - ) + tmp = ( + ddf[src_col_names] + .groupby(src_col_names) + .count() + .reset_index() + .rename( + columns=dict(zip(src_col_names, self.col_names)), ) + ) + + if dst_col_names is not None: + tmp_dst = ( + ddf[dst_col_names] + .groupby(dst_col_names) + .count() + .reset_index() + ) + for i, (newname, oldname) in enumerate(zip(self.col_names, + dst_col_names)): + if i == 0: + tmp_df = tmp[newname].append(tmp_dst[oldname]).\ + to_frame(name=newname) + else: + tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) + print(tmp_df.columns) + else: + for newname in self.col_names: + tmp_df[newname] = tmp[newname] + tmp_ddf = tmp_df.groupby(self.col_names).count().reset_index() + + # Set global index + tmp_ddf = tmp_ddf.assign(idx=1) + tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1 + tmp_ddf = tmp_ddf.drop(columns='idx') + + self.ddf = tmp_ddf + return tmp_ddf def __init__(self, id_type=np.int32): self.implementation = None self.id_type = id_type - def aggregate_count_and_partition(df): - d = {} - d['count'] = df['count'].sum() - d['partition'] = df['partition'].min() - return cudf.Series(d, index=['count', 'partition']) - def compute_vals_types(df, column_names): """ Helper function to compute internal column names and types @@ -443,125 +305,19 @@ def compute_vals(column_names): """ return [str(i) for i in range(len(column_names))] - def from_dataframe( - self, df, src_col_names, dst_col_names=None, store_transposed=False - ): - """ - Populate the numbering map with vertices from the specified - columns of the provided DataFrame. - - Parameters - ---------- - df : cudf.DataFrame or dask_cudf.DataFrame - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - src_col_names: list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for source vertices - dst_col_names: list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for destination vertices - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - - """ - if self.implementation is not None: - raise Exception("NumberMap is already populated") - - if dst_col_names is not None and len(src_col_names) != len( - dst_col_names - ): - raise Exception( - "src_col_names must have same length as dst_col_names" - ) - - if type(df) is cudf.DataFrame: - self.implementation = NumberMap.SingleGPU( - df, src_col_names, dst_col_names, self.id_type, - store_transposed - ) - elif type(df) is dask_cudf.DataFrame: - self.implementation = NumberMap.MultiGPU( - df, src_col_names, dst_col_names, self.id_type, - store_transposed - ) - else: - raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") - - self.implementation.compute() - - def from_series(self, src_series, dst_series=None, store_transposed=False): - """ - Populate the numbering map with vertices from the specified - pair of series objects, one for the source and one for - the destination - - Parameters - ---------- - src_series: cudf.Series or dask_cudf.Series - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - dst_series: cudf.Series or dask_cudf.Series - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - """ - if self.implementation is not None: - raise Exception("NumberMap is already populated") - - if dst_series is not None and type(src_series) != type(dst_series): - raise Exception("src_series and dst_series must have same type") - - if type(src_series) is cudf.Series: - dst_series_list = None - df = cudf.DataFrame() - df["s"] = src_series - if dst_series is not None: - df["d"] = dst_series - dst_series_list = ["d"] - self.implementation = NumberMap.SingleGPU( - df, ["s"], dst_series_list, self.id_type, store_transposed - ) - elif type(src_series) is dask_cudf.Series: - dst_series_list = None - df = dask_cudf.DataFrame() - df["s"] = src_series - if dst_series is not None: - df["d"] = dst_series - dst_series_list = ["d"] - self.implementation = NumberMap.MultiGPU( - df, ["s"], dst_series_list, self.id_type, store_transposed - ) - else: - raise Exception( - "src_series must be cudf.Series or " "dask_cudf.Series" - ) - - self.implementation.compute() - def to_internal_vertex_id(self, df, col_names=None): """ Given a collection of external vertex ids, return the internal vertex ids - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers - col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier - Returns --------- vertex_ids : cudf.Series or dask_cudf.Series @@ -569,7 +325,6 @@ def to_internal_vertex_id(self, df, col_names=None): does not guarantee order or partitioning (in the case of dask_cudf) of vertex ids. If order matters use add_internal_vertex_id - """ tmp_df = None tmp_col_names = None @@ -600,34 +355,27 @@ def add_internal_vertex_id( """ Given a collection of external vertex ids, return the internal vertex ids combined with the input data. - If a series-type input is provided then the series will be in a column named '0'. Otherwise the input column names in the DataFrame will be preserved. - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series Contains a list of external vertex identifiers that will be converted into internal vertex identifiers - id_column_name: (optional) string The name to be applied to the column containing the id (defaults to 'id') - col_names: (optional) list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier - drop: (optional) boolean If True, drop the column names specified in col_names from the returned DataFrame. Defaults to False. - preserve_order: (optional) boolean If True, do extra sorting work to preserve the order of the input DataFrame. Defaults to False. - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -635,7 +383,6 @@ def add_internal_vertex_id( with an additional column containing the internal vertex id. Note that there is no guarantee of the order or partitioning of elements in the returned DataFrame. - """ tmp_df = None tmp_col_names = None @@ -671,7 +418,6 @@ def from_internal_vertex_id( """ Given a collection of internal vertex ids, return a DataFrame of the external vertex ids - Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series @@ -681,20 +427,16 @@ def from_internal_vertex_id( in a column labeled 'id'. If df is a dataframe type object then internal_column_name should identify which column corresponds the the internal vertex id that should be converted - internal_column_name: (optional) string Name of the column containing the internal vertex id. If df is a series then this parameter is ignored. If df is a DataFrame this parameter is required. - external_column_names: (optional) string or list of strings Name of the columns that define an external vertex id. If not specified, columns will be labeled '0', '1,', ..., 'n-1' - drop: (optional) boolean If True the internal column name will be dropped from the DataFrame. Defaults to False. - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame @@ -727,107 +469,117 @@ def from_internal_vertex_id( return output_df - def column_names(self): - """ - Return the list of internal column names - - Returns - ---------- - List of column names ('0', '1', ..., 'n-1') - """ - return self.implementation.col_names - def renumber(df, src_col_names, dst_col_names, preserve_order=False, store_transposed=False): - """ - Given a single GPU or distributed DataFrame, use src_col_names and - dst_col_names to identify the source vertex identifiers and destination - vertex identifiers, respectively. - - Internal vertex identifiers will be created, numbering vertices as - integers starting from 0. - - The function will return a DataFrame containing the original dataframe - contents with a new column labeled 'src' containing the renumbered - source vertices and a new column labeled 'dst' containing the - renumbered dest vertices, along with a NumberMap object that contains - the number map for the numbering that was used. - - Note that this function does not guarantee order in single GPU mode, - and does not guarantee order or partitioning in multi-GPU mode. If you - wish to preserve ordering, add an index column to df and sort the - return by that index column. - - Parameters - ---------- - df: cudf.DataFrame or dask_cudf.DataFrame - Contains a list of external vertex identifiers that will be - numbered by the NumberMap class. - src_col_names: string or list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for source vertices - dst_col_names: string or list of strings - This list of 1 or more strings contain the names - of the columns that uniquely identify an external - vertex identifier for destination vertices - store_transposed : bool - Identify how the graph adjacency will be used. - If True, the graph will be organized by destination. - If False, the graph will be organized by source - - Returns - --------- - df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. Columns - are added to the DataFrame to identify the external vertex - identifiers. If external_columns is specified, these names - are used as the names of the output columns. If external_columns - is not specifed the columns are labeled '0', ... 'n-1' based on - the number of columns identifying the external vertex identifiers. - - number_map : NumberMap - The number map object object that retains the mapping between - internal vertex identifiers and external vertex identifiers. + if isinstance(src_col_names, list): + renumber_type = 'legacy' + elif not (df[src_col_names].dtype == np.int32 or + df[src_col_names].dtype == np.int64): + renumber_type = 'legacy' + else: + renumber_type = 'experimental' + df = df.rename(columns={src_col_names: "src", + dst_col_names: "dst"}) - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> - >>> df, number_map = NumberMap.renumber(df, '0', '1') - >>> - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, 'src', 'dst') - """ renumber_map = NumberMap() - - if isinstance(src_col_names, list): - renumber_map.from_dataframe(df, src_col_names, dst_col_names) - df = renumber_map.add_internal_vertex_id( - df, "src", src_col_names, drop=True, - preserve_order=preserve_order + if not isinstance(src_col_names, list): + src_col_names = [src_col_names] + dst_col_names = [dst_col_names] + if type(df) is cudf.DataFrame: + renumber_map.implementation = NumberMap.SingleGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed ) - df = renumber_map.add_internal_vertex_id( - df, "dst", dst_col_names, drop=True, - preserve_order=preserve_order + elif type(df) is dask_cudf.DataFrame: + renumber_map.implementation = NumberMap.MultiGPU( + df, src_col_names, dst_col_names, renumber_map.id_type, + store_transposed ) else: - renumber_map.from_dataframe(df, [src_col_names], [dst_col_names]) + raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame") + + if renumber_type == 'legacy': + indirection_map = renumber_map.implementation.\ + indirection_map(df, + src_col_names, + dst_col_names) df = renumber_map.add_internal_vertex_id( df, "src", src_col_names, drop=True, preserve_order=preserve_order ) - df = renumber_map.add_internal_vertex_id( df, "dst", dst_col_names, drop=True, preserve_order=preserve_order ) - if type(df) is dask_cudf.DataFrame: - df = df.persist() + num_edges = len(df) + + if isinstance(df, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + if is_mnmg: + client = default_client() + data = get_distributed_data(df) + result = [(client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_edges, + is_mnmg, + store_transposed, + workers=[wf[0]]), wf[0]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + + def get_renumber_map(data): + return data[0] + + def get_renumbered_df(data): + return data[1] + + renumbering_map = dask_cudf.from_delayed( + [client.submit(get_renumber_map, + data, + workers=[wf]) + for (data, wf) in result]) + renumbered_df = dask_cudf.from_delayed( + [client.submit(get_renumbered_df, + data, + workers=[wf]) + for (data, wf) in result]) + if renumber_type == 'legacy': + renumber_map.implementation.ddf = indirection_map.merge( + renumbering_map, + right_on='original_ids', left_on='global_id', + how='right').\ + drop(columns=['global_id', 'original_ids'])\ + .rename(columns={'new_ids': 'global_id'}) + else: + renumber_map.implementation.ddf = renumbering_map.rename( + columns={'original_ids': '0', 'new_ids': 'global_id'}) + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map - return df, renumber_map + else: + renumbering_map, renumbered_df = c_renumber.renumber( + df, + num_edges, + 0, + Comms.get_default_handle(), + is_mnmg, + store_transposed) + if renumber_type == 'legacy': + renumber_map.implementation.df = indirection_map.\ + merge(renumbering_map, + right_on='original_ids', left_on='id').\ + drop(columns=['id', 'original_ids'])\ + .rename(columns={'new_ids': 'id'}, copy=False) + else: + renumber_map.implementation.df = renumbering_map.rename( + columns={'original_ids': '0', 'new_ids': 'id'}, copy=False) + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map def unrenumber(self, df, column_name, preserve_order=False): """ @@ -837,30 +589,24 @@ def unrenumber(self, df, column_name, preserve_order=False): name for the external vertex identifiers. If the renumbering is from a multi-column input, the output columns will be labeled 0 through n-1 with a suffix of _column_name. - Note that this function does not guarantee order or partitioning in multi-GPU mode. - Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame A DataFrame containing internal vertex identifiers that will be converted into external vertex identifiers. - column_name: string Name of the column containing the internal vertex id. - preserve_order: (optional) bool If True, preserve the ourder of the rows in the output DataFrame to match the input DataFrame - Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame The original DataFrame columns exist unmodified. The external vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. - Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 302fcfe583b..682c6b32a0f 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -43,8 +43,8 @@ cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): shuffled_minor_series = cudf.Series(data=shuffled_minor_buffer, dtype=vertex_t) shuffled_df = cudf.DataFrame() - shuffled_df['src']=shuffled_major_series - shuffled_df['dst']=shuffled_minor_series + shuffled_df['major_vertices']=shuffled_major_series + shuffled_df['minor_vertices']=shuffled_minor_series if weights is not None: weight_t = weights.dtype @@ -53,7 +53,7 @@ cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights): shuffled_weights_series = cudf.Series(data=shuffled_weights_buffer, dtype=weight_t) - shuffled_df['weights']= shuffled_weights_series + shuffled_df['value']= shuffled_weights_series return shuffled_df @@ -84,7 +84,7 @@ def renumber(input_df, # maybe use cpdef ? if num_global_edges > (2**31 - 1): edge_t = np.dtype("int64") else: - edge_t = np.dtype("int32") + edge_t = vertex_t if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype @@ -150,15 +150,19 @@ def renumber(input_df, # maybe use cpdef ? num_partition_edges, is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: - shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) + shuffled_df = input_df - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, @@ -209,15 +213,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, @@ -259,6 +267,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): @@ -270,15 +279,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, @@ -330,15 +343,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, @@ -379,6 +396,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif (vertex_t == np.dtype("int64")): if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): @@ -391,15 +409,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, @@ -428,8 +450,8 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), + uniq_partition_vector_64.get()[0].at(1)), dtype=vertex_t) # create new cudf df @@ -441,6 +463,7 @@ def renumber(input_df, # maybe use cpdef ? renumbered_map['new_ids'] = new_series return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): if(is_multi_gpu): ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), @@ -451,15 +474,19 @@ def renumber(input_df, # maybe use cpdef ? is_hyper_partitioned).release()) shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + major_vertices = shuffled_df['major_vertices'] + minor_vertices = shuffled_df['minor_vertices'] + num_partition_edges = len(shuffled_df) + if not transposed: + major = 'src'; minor = 'dst' + else: + major = 'dst'; minor = 'src' + shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) else: shuffled_df = input_df - - shuffled_src = shuffled_df['src'] - shuffled_dst = shuffled_df['dst'] - num_partition_edges = len(shuffled_df) - - shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] - shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] + shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, @@ -488,8 +515,8 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), + uniq_partition_vector_64.get()[0].at(1)), dtype=vertex_t) # create new cudf df diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 6f88d5f85c4..5362d3f5804 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -44,13 +44,14 @@ def test_renumber_ips(): gdf["source_as_int"] = gdf["source_list"].str.ip2int() gdf["dest_as_int"] = gdf["dest_list"].str.ip2int() - numbering = NumberMap() - numbering.from_series(gdf["source_as_int"], gdf["dest_as_int"]) - src = numbering.to_internal_vertex_id(gdf["source_as_int"]) - dst = numbering.to_internal_vertex_id(gdf["dest_as_int"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_as_int", + "dest_as_int") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_as_int"]) assert check_dst.equals(gdf["dest_as_int"]) @@ -78,13 +79,14 @@ def test_renumber_ips_cols(): gdf["source_as_int"] = gdf["source_list"].str.ip2int() gdf["dest_as_int"] = gdf["dest_list"].str.ip2int() - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_as_int"], ["dest_as_int"]) - src = numbering.to_internal_vertex_id(gdf["source_as_int"]) - dst = numbering.to_internal_vertex_id(gdf["dest_as_int"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + ["source_as_int"], + ["dest_as_int"]) - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_as_int"]) assert check_dst.equals(gdf["dest_as_int"]) @@ -110,13 +112,14 @@ def test_renumber_ips_str_cols(): gdf = cudf.from_pandas(pdf) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + ["source_as_int"], + ["dest_as_int"]) - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) @@ -130,13 +133,14 @@ def test_renumber_negative(): gdf = cudf.DataFrame.from_pandas(df[["source_list", "dest_list"]]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_list", + "dest_list") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) @@ -150,19 +154,21 @@ def test_renumber_negative_col(): gdf = cudf.DataFrame.from_pandas(df[["source_list", "dest_list"]]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["source_list"], ["dest_list"]) - src = numbering.to_internal_vertex_id(gdf["source_list"]) - dst = numbering.to_internal_vertex_id(gdf["dest_list"]) + renumbered_gdf, renumber_map = NumberMap.renumber(gdf, + "source_list", + "dest_list") - check_src = numbering.from_internal_vertex_id(src)["0"] - check_dst = numbering.from_internal_vertex_id(dst)["0"] + check_src = renumber_map.from_internal_vertex_id(renumbered_gdf['src'] + )["0"] + check_dst = renumber_map.from_internal_vertex_id(renumbered_gdf['dst'] + )["0"] assert check_src.equals(gdf["source_list"]) assert check_dst.equals(gdf["dest_list"]) # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): gc.collect() @@ -215,19 +221,21 @@ def test_renumber_files(graph_file): df["dst"] = cudf.Series([x + translate for x in destinations. values_host]) - numbering = NumberMap() - numbering.from_series(df["src"], df["dst"]) + exp_src = cudf.Series([x + translate for x in sources. + values_host]) + exp_dst = cudf.Series([x + translate for x in destinations. + values_host]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(df, "src_id", ["src"]), - "dst_id", ["dst"] - ) + renumbered_df, renumber_map = NumberMap.renumber(df, "src", "dst", + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - assert check_src["src"].equals(check_src["0"]) - assert check_dst["dst"].equals(check_dst["0"]) + assert exp_src.equals(unrenumbered_df["src"]) + assert exp_dst.equals(unrenumbered_df["dst"]) # Test all combinations of default/managed and pooled/non-pooled allocation @@ -246,19 +254,21 @@ def test_renumber_files_col(graph_file): gdf['dst'] = cudf.Series([x + translate for x in destinations. values_host]) - numbering = NumberMap() - numbering.from_dataframe(gdf, ["src"], ["dst"]) + exp_src = cudf.Series([x + translate for x in sources. + values_host]) + exp_dst = cudf.Series([x + translate for x in destinations. + values_host]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(gdf, "src_id", ["src"]), - "dst_id", ["dst"] - ) + renumbered_df, renumber_map = NumberMap.renumber(gdf, ["src"], ["dst"], + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - assert check_src["src"].equals(check_src["0"]) - assert check_dst["dst"].equals(check_dst["0"]) + assert exp_src.equals(unrenumbered_df["src"]) + assert exp_dst.equals(unrenumbered_df["dst"]) # Test all combinations of default/managed and pooled/non-pooled allocation @@ -278,21 +288,17 @@ def test_renumber_files_multi_col(graph_file): gdf["src"] = sources + translate gdf["dst"] = destinations + translate - numbering = NumberMap() - numbering.from_dataframe(gdf, ["src", "src_old"], ["dst", "dst_old"]) + renumbered_df, renumber_map = NumberMap.renumber(gdf, + ["src", "src_old"], + ["dst", "dst_old"], + preserve_order=True) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id( - gdf, "src_id", ["src", "src_old"] - ), - "dst_id", - ["dst", "dst_old"], - ) + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=True) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=True) - check_src = numbering.from_internal_vertex_id(renumbered_df, "src_id") - check_dst = numbering.from_internal_vertex_id(renumbered_df, "dst_id") - - assert check_src["src"].equals(check_src["0"]) - assert check_src["src_old"].equals(check_src["1"]) - assert check_dst["dst"].equals(check_dst["0"]) - assert check_dst["dst_old"].equals(check_dst["1"]) + assert gdf["src"].equals(unrenumbered_df["0_src"]) + assert gdf["src_old"].equals(unrenumbered_df["1_src"]) + assert gdf["dst"].equals(unrenumbered_df["0_dst"]) + assert gdf["dst_old"].equals(unrenumbered_df["1_dst"]) From f2e5a8755e18aadfc151ca65787dd4a3775efb85 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Date: Tue, 30 Mar 2021 13:09:33 -0500 Subject: [PATCH 35/51] Implement C/CUDA RandomWalks functionality (#1439) This PR tracks work on issue: https://github.com/rapidsai/cugraph/issues/1380. Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Chuck Hastings (@ChuckHastings) - Seunghwa Kang (@seunghwak) URL: https://github.com/rapidsai/cugraph/pull/1439 --- cpp/CMakeLists.txt | 1 + cpp/include/algorithms.hpp | 28 + cpp/include/utilities/cython.hpp | 24 + cpp/include/utilities/graph_traits.hpp | 61 ++ cpp/src/experimental/random_walks.cuh | 887 ++++++++++++++++++ cpp/src/sampling/random_walks.cu | 78 ++ cpp/src/utilities/cython.cu | 76 ++ cpp/tests/CMakeLists.txt | 14 + cpp/tests/experimental/random_walks_test.cu | 152 +++ cpp/tests/experimental/random_walks_utils.cuh | 152 +++ cpp/tests/experimental/rw_low_level_test.cu | 783 ++++++++++++++++ 11 files changed, 2256 insertions(+) create mode 100644 cpp/include/utilities/graph_traits.hpp create mode 100644 cpp/src/experimental/random_walks.cuh create mode 100644 cpp/src/sampling/random_walks.cu create mode 100644 cpp/tests/experimental/random_walks_test.cu create mode 100644 cpp/tests/experimental/random_walks_utils.cuh create mode 100644 cpp/tests/experimental/rw_low_level_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 34ea935e31d..57f324a60a9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -420,6 +420,7 @@ add_library(cugraph SHARED src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu src/community/egonet.cu + src/sampling/random_walks.cu src/cores/core_number.cu src/traversal/two_hop_neighbors.cu src/components/connectivity.cu diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index b8706d81e21..0b45b799357 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1252,5 +1252,33 @@ extract_ego(raft::handle_t const &handle, vertex_t *source_vertex, vertex_t n_subgraphs, vertex_t radius); + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph/view (typically, graph_view_t). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph (view )object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. The meaning of "coalesced" here is that a 2D array of paths of different sizes + * is represented as a 1D array. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const &handle, + graph_t const &graph, + typename graph_t::vertex_type const *ptr_d_start, + index_t num_paths, + index_t max_depth); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 98e850abbf0..a58331d465a 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -185,6 +186,19 @@ struct major_minor_weights_t { rmm::device_uvector shuffled_weights_; }; +// aggregate for random_walks() return type +// to be exposed to cython: +// +struct random_walk_ret_t { + size_t coalesced_sz_v_; + size_t coalesced_sz_w_; + size_t num_paths_; + size_t max_depth_; + std::unique_ptr d_coalesced_v_; + std::unique_ptr d_coalesced_w_; + std::unique_ptr d_sizes_; +}; + // wrapper for renumber_edgelist() return // (unrenumbering maps, etc.) // @@ -442,6 +456,16 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); +// wrapper for random_walks. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth); // wrapper for shuffling: // diff --git a/cpp/include/utilities/graph_traits.hpp b/cpp/include/utilities/graph_traits.hpp new file mode 100644 index 00000000000..363a13190be --- /dev/null +++ b/cpp/include/utilities/graph_traits.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { +namespace experimental { + +// primary template: +// +template +struct is_one_of; // purposely empty + +// partial specializations: +// +template +struct is_one_of { + static constexpr bool value = std::is_same::value || is_one_of::value; +}; + +template +struct is_one_of { + static constexpr bool value = false; +}; + +// meta-function that constrains +// vertex_t and edge_t template param candidates: +// +template +struct is_vertex_edge_combo { + static constexpr bool value = is_one_of::value && + is_one_of::value && + (sizeof(vertex_t) <= sizeof(edge_t)); +}; + +// meta-function that constrains +// all 3 template param candidates: +// +template +struct is_candidate { + static constexpr bool value = + is_vertex_edge_combo::value && is_one_of::value; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/random_walks.cuh b/cpp/src/experimental/random_walks.cuh new file mode 100644 index 00000000000..aea8f3d8420 --- /dev/null +++ b/cpp/src/experimental/random_walks.cuh @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +using device_vec_t = rmm::device_uvector; + +template +using device_v_it = typename device_vec_t::iterator; + +template +value_t* raw_ptr(device_vec_t& dv) +{ + return dv.data(); +} + +template +value_t const* raw_const_ptr(device_vec_t const& dv) +{ + return dv.data(); +} + +template +struct device_const_vector_view { + device_const_vector_view(value_t const* d_buffer, index_t size) : d_buffer_(d_buffer), size_(size) + { + } + + device_const_vector_view(device_const_vector_view const& other) = delete; + device_const_vector_view& operator=(device_const_vector_view const& other) = delete; + + device_const_vector_view(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + } + device_const_vector_view& operator=(device_const_vector_view&& other) + { + d_buffer_ = other.d_buffer_; + size_ = other.size_; + + return *this; + } + + value_t const* begin(void) const { return d_buffer_; } + + value_t const* end() const { return d_buffer_ + size_; } + + index_t size(void) const { return size_; } + + private: + value_t const* d_buffer_{nullptr}; + index_t size_; +}; + +// raft random generator: +// (using upper-bound cached "map" +// giving out_deg(v) for each v in [0, |V|); +// and a pre-generated vector of float random values +// in [0,1] to be brought into [0, d_ub[v])) +// +template +struct rrandom_gen_t { + using seed_type = seed_t; + using real_type = real_t; + + rrandom_gen_t(raft::handle_t const& handle, + index_t num_paths, + device_vec_t& d_random, // scratch-pad, non-coalesced + device_vec_t const& d_crt_out_deg, // non-coalesced + seed_t seed = seed_t{}) + : handle_(handle), + seed_(seed), + num_paths_(num_paths), + d_ptr_out_degs_(raw_const_ptr(d_crt_out_deg)), + d_ptr_random_(raw_ptr(d_random)) + { + auto rnd_sz = d_random.size(); + + CUGRAPH_EXPECTS(rnd_sz >= static_cast(num_paths), + "Un-allocated random buffer."); + + // done in constructor; + // this must be done at each step, + // but this object is constructed at each step; + // + raft::random::Rng rng(seed_); + rng.uniform( + d_ptr_random_, num_paths, real_t{0.0}, real_t{1.0}, handle.get_stream()); + } + + // in place: + // for each v in [0, num_paths) { + // if out_deg(v) > 0 + // d_col_indx[v] = random index in [0, out_deg(v)) + //} + void generate_col_indices(device_vec_t& d_col_indx) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_ptr_random_, + d_ptr_random_ + num_paths_, // input1 + d_ptr_out_degs_, // input2 + d_ptr_out_degs_, // also stencil + d_col_indx.begin(), + [] __device__(real_t rnd_vindx, edge_t crt_out_deg) { + real_t max_ub = static_cast(crt_out_deg - 1); + auto interp_vindx = rnd_vindx * max_ub + real_t{.5}; + vertex_t v_indx = static_cast(interp_vindx); + return (v_indx >= crt_out_deg ? crt_out_deg - 1 : v_indx); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + edge_t const* d_ptr_out_degs_; // device buffer with out-deg of current set of vertices (most + // recent vertex in each path); size = num_paths_ + real_t* d_ptr_random_; // device buffer with real random values; size = num_paths_ + seed_t seed_; // seed to be used for current batch +}; + +// seeding policy: time (clock) dependent, +// to avoid RW calls repeating same random data: +// +template +struct clock_seeding_t { + clock_seeding_t(void) = default; + + seed_t operator()(void) { return static_cast(std::time(nullptr)); } +}; + +// seeding policy: fixed for debug/testing repro +// +template +struct fixed_seeding_t { + // purposely no default cnstr. + + fixed_seeding_t(seed_t seed) : seed_(seed) {} + seed_t operator()(void) { return seed_; } + + private: + seed_t seed_; +}; + +// classes abstracting the next vertex extraction mechanism: +// +// primary template, purposely undefined +template +struct col_indx_extract_t; + +// specialization for single-gpu functionality: +// +template +struct col_indx_extract_t> { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + col_indx_extract_t(raft::handle_t const& handle, + device_vec_t const& d_indices, + device_vec_t const& d_offsets, + device_vec_t const& d_values, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(raw_const_ptr(d_indices)), + row_offsets_(raw_const_ptr(d_offsets)), + values_(raw_const_ptr(d_values)), + out_degs_(raw_const_ptr(d_crt_out_degs)), + sizes_(raw_const_ptr(d_sizes)), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + vertex_t const* p_d_indices, + edge_t const* p_d_offsets, + weight_t const* p_d_values, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(p_d_indices), + row_offsets_(p_d_offsets), + values_(p_d_values), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + col_indx_extract_t(raft::handle_t const& handle, + graph_t const& graph, + edge_t const* p_d_crt_out_degs, + index_t const* p_d_sizes, + index_t num_paths, + index_t max_depth) + : handle_(handle), + col_indices_(graph.indices()), + row_offsets_(graph.offsets()), + values_(graph.weights()), + out_degs_(p_d_crt_out_degs), + sizes_(p_d_sizes), + num_paths_(num_paths), + max_depth_(max_depth) + { + } + + // in-place extractor of next set of vertices and weights, + // (d_v_next_vertices, d_v_next_weights), + // given start set of vertices. d_v_src_vertices, + // and corresponding column index set, d_v_col_indx: + // + // for each indx in [0, num_paths){ + // v_indx = d_v_src_vertices[indx*max_depth + d_sizes[indx] - 1]; + // if( out_degs_[v_indx] > 0 ) { + // start_row = row_offsets_[v_indx]; + // delta = d_v_col_indx[indx]; + // d_v_next_vertices[indx] = col_indices_[start_row + delta]; + // } + // (use tranform_if() with transform iterator) + // + void operator()( + device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices + device_vec_t const& + d_v_col_indx, // in: column indices, given by stepper's random engine + device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step + device_vec_t& + d_v_next_weights) // out: set of weights between src and destination vertices, for next step + const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil + thrust::make_zip_iterator( + thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output + [max_depth = max_depth_, + ptr_d_sizes = sizes_, + ptr_d_coalesced_v = raw_const_ptr(d_coalesced_src_v), + row_offsets = row_offsets_, + col_indices = col_indices_, + values = values_] __device__(auto indx, auto col_indx) { + auto delta = ptr_d_sizes[indx] - 1; + auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; + auto start_row = row_offsets[v_indx]; + return thrust::make_tuple(col_indices[start_row + col_indx], values[start_row + col_indx]); + }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + private: + raft::handle_t const& handle_; + vertex_t const* col_indices_; + edge_t const* row_offsets_; + weight_t const* values_; + + edge_t const* out_degs_; + index_t const* sizes_; + index_t num_paths_; + index_t max_depth_; +}; + +/** + * @brief Class abstracting the RW initialization, stepping, and stopping functionality + * The outline of the algorithm is as follows: + * + * (1) vertex sets are coalesced into d_coalesced_v, + * weight sets are coalesced into d_coalesced_w; + * i.e., the 2 coalesced vectors are allocated to + * num_paths * max_depth, and num_paths * (max_depth -1), respectively + * (since each path has a number of edges equal one + * less than the number of vertices); + * d_coalesced_v is initialized for each i*max_depth entry + * (i=0,,,,num_paths-1) to the corresponding starting vertices; + * (2) d_sizes maintains the current size is for each path; + * Note that a path may end prematurely if it reaches a sink vertex; + * (3) d_crt_out_degs maintains the out-degree of each of the latest + * vertices in the path; i.e., if N(v) := set of destination + * vertices from v, then this vector stores |N(v)| + * for last v in each path; i.e., + * d_crt_out_degs[i] = + * out-degree( d_coalesced_v[i*max_depth + d_sizes[i]-1] ), + * for i in {0,..., num_paths-1}; + * (4) a set of num_paths floating point numbers between [0,1] + * are generated at each step; then they get translated into + * _indices_ k in {0,...d_crt_out_degs[i]-1}; + * (5) the next vertex v is then picked as the k-th out-neighbor: + * next(v) = N(v)[k]; + * (6) d_sizes are incremented accordingly; i.e., for those paths whose + * corresponding last vertex has out-degree > 0; + * (7) then next(v) and corresponding weight of (v, next(v)) are stored + * at appropriate location in their corresponding coalesced vectors; + * (8) the client of this class (the random_walks() function) then repeats + * this process max_depth times or until all paths + * have reached sinks; i.e., d_crt_out_degs = {0, 0,...,0}, + * whichever comes first; + * (9) in the end some post-processing is done (stop()) to remove + * unused entries from the 2 coalesced vectors; + * (10) the triplet made of the 2 coalesced vectors and d_sizes is then returned; + * + */ +template , + typename index_t = typename graph_t::edge_type> +struct random_walker_t { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + random_walker_t(raft::handle_t const& handle, + graph_t const& graph, + index_t num_paths, + index_t max_depth) + : handle_(handle), + num_paths_(num_paths), + max_depth_(max_depth), + d_cached_out_degs_(graph.compute_out_degrees(handle_)) + { + } + + // for each i in [0..num_paths_) { + // d_paths_v_set[i*max_depth] = d_src_init_v[i]; + // + void start(device_const_vector_view& d_src_init_v, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + // intialize path sizes to 1, as they contain at least one vertex each: + // the initial set: d_src_init_v; + // + thrust::copy_n(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + thrust::make_constant_iterator(1), + num_paths_, + d_sizes.begin()); + + // scatter d_src_init_v to coalesced vertex vector: + // + auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src_init_v.begin(), + d_src_init_v.end(), + map_it_begin, + d_paths_v_set.begin()); + } + + // overload for start() with device_uvector d_v_start + // (handy for testing) + // + void start(device_vec_t const& d_start, // in: start set + device_vec_t& d_paths_v_set, // out: coalesced v + device_vec_t& d_sizes) const // out: init sizes to {1,...} + { + device_const_vector_view d_start_cview{d_start.data(), + static_cast(d_start.size())}; + + start(d_start_cview, d_paths_v_set, d_sizes); + } + + // in-place updates its arguments from one step to next + // (to avoid copying); all "crt" arguments are updated at each step() + // and passed as scratchpad space to avoid copying them + // from one step to another + // + // take one step in sync for all paths that have not reached sinks: + // + void step( + graph_t const& graph, + seed_t seed, + device_vec_t& d_coalesced_v, // crt coalesced vertex set + device_vec_t& d_coalesced_w, // crt coalesced weight set + device_vec_t& d_paths_sz, // crt paths sizes + device_vec_t& d_crt_out_degs, // crt out-degs for current set of vertices + device_vec_t& d_random, // crt set of random real values + device_vec_t& d_col_indx, // crt col col indices to be used for retrieving next step + device_vec_t& d_next_v, // crt set of destination vertices, for next step + device_vec_t& d_next_w) + const // set of weights between src and destination vertices, for next step + { + // update crt snapshot of out-degs, + // from cached out degs, using + // latest vertex in each path as source: + // + gather_from_coalesced( + d_coalesced_v, d_cached_out_degs_, d_paths_sz, d_crt_out_degs, max_depth_, num_paths_); + + // generate random destination indices: + // + random_engine_t rgen(handle_, num_paths_, d_random, d_crt_out_degs, seed); + + rgen.generate_col_indices(d_col_indx); + + // dst extraction from dst indices: + // + col_indx_extract_t col_extractor(handle_, + graph, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_paths_sz), + num_paths_, + max_depth_); + + // The following steps update the next entry in each path, + // except the paths that reached sinks; + // + // for each indx in [0..num_paths) { + // v_indx = d_v_rnd_n_indx[indx]; + // + // -- get the `v_indx`-th out-vertex of d_v_paths_v_set[indx] vertex: + // -- also, note the size deltas increased by 1 in dst (d_sizes[]): + // + // d_coalesced_v[indx*num_paths + d_sizes[indx]] = + // get_out_vertex(graph, d_coalesced_v[indx*num_paths + d_sizes[indx] -1)], v_indx); + // d_coalesced_w[indx*(num_paths-1) + d_sizes[indx] - 1] = + // get_out_edge_weight(graph, d_coalesced_v[indx*num_paths + d_sizes[indx]-2], v_indx); + // + // (1) generate actual vertex destinations: + // + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + // (2) update path sizes: + // + update_path_sizes(d_crt_out_degs, d_paths_sz); + + // (3) actual coalesced updates: + // + scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_paths_sz); + scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_paths_sz); + } + + // returns true if all paths reached sinks: + // + bool all_paths_stopped(device_vec_t const& d_crt_out_degs) const + { + auto how_many_stopped = + thrust::count_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_crt_out_degs.begin(), + d_crt_out_degs.end(), + [] __device__(auto crt_out_deg) { return crt_out_deg == 0; }); + return (static_cast(how_many_stopped) == d_crt_out_degs.size()); + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + void stop(device_vec_t& d_coalesced_v, // coalesced vertex set + device_vec_t& d_coalesced_w, // coalesced weight set + device_vec_t const& d_sizes) const // paths sizes + { + assert(max_depth_ > 1); // else, no need to step; and no edges + + index_t const* ptr_d_sizes = d_sizes.data(); + + auto predicate_v = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / max_depth; + auto col_indx = indx % max_depth; + + return (col_indx >= ptr_d_sizes[row_indx]); + }; + + auto predicate_w = [max_depth = max_depth_, ptr_d_sizes] __device__(auto indx) { + auto row_indx = indx / (max_depth - 1); + auto col_indx = indx % (max_depth - 1); + + return (col_indx >= ptr_d_sizes[row_indx] - 1); + }; + + auto new_end_v = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_v.begin(), + d_coalesced_v.end(), + thrust::make_counting_iterator(0), + predicate_v); + + auto new_end_w = + thrust::remove_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_coalesced_w.begin(), + d_coalesced_w.end(), + thrust::make_counting_iterator(0), + predicate_w); + + CUDA_TRY(cudaStreamSynchronize(handle_.get_stream())); + + d_coalesced_v.resize(thrust::distance(d_coalesced_v.begin(), new_end_v), handle_.get_stream()); + d_coalesced_w.resize(thrust::distance(d_coalesced_w.begin(), new_end_w), handle_.get_stream()); + } + + // in-place non-static (needs handle_): + // for indx in [0, nelems): + // gather d_result[indx] = d_src[d_coalesced[indx*stride + d_sizes[indx] -1]] + // + template + void gather_from_coalesced( + device_vec_t const& d_coalesced, // |gather map| = stride*nelems + device_vec_t const& d_src, // |gather input| = nelems + device_vec_t const& d_sizes, // |paths sizes| = nelems, elems in [1, stride] + device_vec_t& d_result, // |output| = nelems + index_t stride, // stride = coalesce block size (typically max_depth) + index_t nelems) const // nelems = number of elements to gather (typically num_paths_) + { + vertex_t const* ptr_d_coalesced = raw_const_ptr(d_coalesced); + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + // delta = ptr_d_sizes[indx] - 1 + // + auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::gather(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + map_it_begin, + map_it_begin + nelems, + d_src.begin(), + d_result.begin()); + } + + // in-place non-static (needs handle_); + // pre-condition: path sizes are assumed updated + // to reflect new vertex additions; + // + // for indx in [0, nelems): + // if ( d_crt_out_degs[indx] > 0 ) + // d_coalesced[indx*stride + (d_sizes[indx] - adjust)- 1] = d_src[indx] + // + // adjust := 0 for coalesced vertices; 1 for weights + // (because |edges| = |vertices| - 1, in each path); + // + template + void scatter_to_coalesced( + device_vec_t const& d_src, // |scatter input| = nelems + device_vec_t& d_coalesced, // |scatter input| = stride*nelems + device_vec_t const& d_crt_out_degs, // |current set of vertex out degrees| = nelems, + // to be used as stencil (don't scatter if 0) + device_vec_t const& + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; + // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter + index_t + stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) + index_t nelems, // nelems = number of elements to gather (typically num_paths_) + index_t adjust = 0) + const // adjusting parameter for scattering vertices (0) or weights (1); see above for more; + { + index_t const* ptr_d_sizes = raw_const_ptr(d_sizes); + + auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }; + + // use the transform iterator as map: + // + auto map_it_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), dlambda); + + thrust::scatter_if(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_src.begin(), + d_src.end(), + map_it_begin, + d_crt_out_degs.begin(), + d_coalesced.begin(), + [] __device__(auto crt_out_deg) { + return crt_out_deg > 0; // predicate + }); + } + + // updates the entries in the corresponding coalesced vector, + // for which out_deg > 0 + // + void scatter_vertices(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced(d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_, num_paths_); + } + // + void scatter_weights(device_vec_t const& d_src, + device_vec_t& d_coalesced, + device_vec_t const& d_crt_out_degs, + device_vec_t const& d_sizes) const + { + scatter_to_coalesced( + d_src, d_coalesced, d_crt_out_degs, d_sizes, max_depth_ - 1, num_paths_, 1); + } + + // in-place update (increment) path sizes for paths + // that have not reached a sink; i.e., for which + // d_crt_out_degs[indx]>0: + // + void update_path_sizes(device_vec_t const& d_crt_out_degs, + device_vec_t& d_sizes) const + { + thrust::transform_if( + rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), + d_sizes.begin(), + d_sizes.end(), // input + d_crt_out_degs.begin(), // stencil + d_sizes.begin(), // output: in-place + [] __device__(auto crt_sz) { return crt_sz + 1; }, + [] __device__(auto crt_out_deg) { return crt_out_deg > 0; }); + } + + device_vec_t const& get_out_degs(void) const { return d_cached_out_degs_; } + + private: + raft::handle_t const& handle_; + index_t num_paths_; + index_t max_depth_; + device_vec_t d_cached_out_degs_; +}; + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Single-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. + * number(paths) == d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and corresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + using seed_t = typename random_engine_t::seed_type; + using real_t = typename random_engine_t::real_type; + + vertex_t num_vertices = graph.get_number_of_vertices(); + + auto how_many_valid = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_v_start.begin(), + d_v_start.end(), + [num_vertices] __device__(auto crt_vertex) { + return (crt_vertex >= 0) && (crt_vertex < num_vertices); + }); + + CUGRAPH_EXPECTS(static_cast(how_many_valid) == d_v_start.size(), + "Invalid set of starting vertices."); + + auto num_paths = d_v_start.size(); + auto stream = handle.get_stream(); + + random_walker_t rand_walker{ + handle, graph, static_cast(num_paths), static_cast(max_depth)}; + + // pre-allocate num_paths * max_depth; + // + auto coalesced_sz = num_paths * max_depth; + device_vec_t d_coalesced_v(coalesced_sz, stream); // coalesced vertex set + device_vec_t d_coalesced_w(coalesced_sz, stream); // coalesced weight set + device_vec_t d_paths_sz(num_paths, stream); // paths sizes + device_vec_t d_crt_out_degs(num_paths, stream); // out-degs for current set of vertices + device_vec_t d_random(num_paths, stream); + device_vec_t d_col_indx(num_paths, stream); + device_vec_t d_next_v(num_paths, stream); + device_vec_t d_next_w(num_paths, stream); + + // abstracted out seed initialization: + // + seed_t seed0 = static_cast(seeder()); + + // very first vertex, for each path: + // + rand_walker.start(d_v_start, d_coalesced_v, d_paths_sz); + + // start from 1, as 0-th was initialized above: + // + for (decltype(max_depth) step_indx = 1; step_indx < max_depth; ++step_indx) { + // take one-step in-sync for each path in parallel: + // + rand_walker.step(graph, + seed0 + static_cast(step_indx), + d_coalesced_v, + d_coalesced_w, + d_paths_sz, + d_crt_out_degs, + d_random, + d_col_indx, + d_next_v, + d_next_w); + + // early exit: all paths have reached sinks: + // + if (rand_walker.all_paths_stopped(d_crt_out_degs)) break; + } + + // wrap-up, post-process: + // truncate v_set, w_set to actual space used + // + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_paths_sz); + + // because device_uvector is not copy-cnstr-able: + // + return std::make_tuple(std::move(d_coalesced_v), + std::move(d_coalesced_w), + std::move(d_paths_sz), + seed0); // also return seed for repro +} + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Multi-GPU specialization. + * + * @tparam graph_t Type of graph (view). + * @tparam random_engine_t Type of random engine used to generate RW. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param d_v_start Device (view) set of starting vertex indices for the RW. number(RW) == + * d_v_start.size(). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t, seed> Quadruplet of coalesced RW paths, with corresponding edge weights + * for each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed + * to the Python layer. Also returning seed for testing / debugging repro. The meaning of + * "coalesced" here is that a 2D array of paths of different sizes is represented as a 1D array. + */ +template , + typename seeding_policy_t = clock_seeding_t, + typename index_t = typename graph_t::edge_type> +std::enable_if_t, + device_vec_t, + device_vec_t, + typename random_engine_t::seed_type>> +random_walks_impl(raft::handle_t const& handle, + graph_t const& graph, + device_const_vector_view& d_v_start, + index_t max_depth, + seeding_policy_t seeder = clock_seeding_t{}) +{ + CUGRAPH_FAIL("Not implemented yet."); +} + +} // namespace detail + +/** + * @brief returns random walks (RW) from starting sources, where each path is of given maximum + * length. Uniform distribution is assumed for the random engine. + * + * @tparam graph_t Type of graph (view). + * @tparam index_t Type used to store indexing and sizes. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph Graph object to generate RW on. + * @param ptr_d_start Device pointer to set of starting vertex indices for the RW. + * @param num_paths = number(paths). + * @param max_depth maximum length of RWs. + * @return std::tuple, device_vec_t, + * device_vec_t> Triplet of coalesced RW paths, with corresponding edge weights for + * each, and coresponding path sizes. This is meant to minimize the number of DF's to be passed to + * the Python layer. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +random_walks(raft::handle_t const& handle, + graph_t const& graph, + typename graph_t::vertex_type const* ptr_d_start, + index_t num_paths, + index_t max_depth) +{ + using vertex_t = typename graph_t::vertex_type; + + // 0-copy const device view: + // + detail::device_const_vector_view d_v_start{ptr_d_start, num_paths}; + + auto quad_tuple = detail::random_walks_impl(handle, graph, d_v_start, max_depth); + // ignore last element of the quad, seed, + // since it's meant for testing / debugging, only: + // + return std::make_tuple(std::move(std::get<0>(quad_tuple)), + std::move(std::get<1>(quad_tuple)), + std::move(std::get<2>(quad_tuple))); +} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu new file mode 100644 index 00000000000..88d5d9ed5c8 --- /dev/null +++ b/cpp/src/sampling/random_walks.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include +#include + +namespace cugraph { +namespace experimental { +// template explicit instantiation directives (EIDir's): +// +// SG FP32{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +// +// SG FP64{ +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int32_t num_paths, + int32_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int32_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + random_walks(raft::handle_t const& handle, + graph_view_t const& gview, + int64_t const* ptr_d_start, + int64_t num_paths, + int64_t max_depth); +//} +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 5382b4856f3..a9e3146bbcd 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -696,6 +696,61 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, } } +// Wrapper for random_walks() through a graph container +// to expose the API to cython. +// +template +std::enable_if_t::value, + std::unique_ptr> +call_random_walks(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t const* ptr_start_set, + edge_t num_paths, + edge_t max_depth) +{ + if (graph_container.weightType == numberTypeEnum::floatType) { + using weight_t = float; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else if (graph_container.weightType == numberTypeEnum::doubleType) { + using weight_t = double; + + auto graph = + detail::create_graph(handle, graph_container); + + auto triplet = cugraph::experimental::random_walks( + handle, graph->view(), ptr_start_set, num_paths, max_depth); + + random_walk_ret_t rw_tri{std::get<0>(triplet).size(), + std::get<1>(triplet).size(), + static_cast(num_paths), + static_cast(max_depth), + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + + return std::make_unique(std::move(rw_tri)); + + } else { + CUGRAPH_FAIL("Unsupported weight type."); + } +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -1038,6 +1093,27 @@ template std::unique_ptr call_egonet( int64_t n_subgraphs, int64_t radius); +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int32_t num_paths, + int32_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + +template std::unique_ptr call_random_walks( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t const* ptr_start_set, + int64_t num_paths, + int64_t max_depth); + template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, int32_t* identifiers, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5292f9f9997..3b65b0edb29 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -415,6 +415,20 @@ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +################################################################################################### +# - Experimental RANDOM_WALKS tests ------------------------------------------------------------ + +set(EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/random_walks_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_TEST "${EXPERIMENTAL_RANDOM_WALKS_TEST_SRCS}") + +################################################################################################### +set(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/rw_low_level_test.cu") + +ConfigureTest(EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_TEST "${EXPERIMENTAL_RANDOM_WALKS_LOW_LEVEL_SRCS}") + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- diff --git a/cpp/tests/experimental/random_walks_test.cu b/cpp/tests/experimental/random_walks_test.cu new file mode 100644 index 00000000000..9fb1716f62b --- /dev/null +++ b/cpp/tests/experimental/random_walks_test.cu @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cuda_profiler_api.h" +#include "gtest/gtest.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include + +namespace { // anonym. +template +void fill_start(raft::handle_t const& handle, + rmm::device_uvector& d_start, + index_t num_vertices) +{ + index_t num_paths = d_start.size(); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + + d_start.begin(), + [num_vertices] __device__(auto indx) { return indx % num_vertices; }); +} +} // namespace + +struct RandomWalks_Usecase { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + RandomWalks_Usecase(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +class Tests_RandomWalks : public ::testing::TestWithParam { + public: + Tests_RandomWalks() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(RandomWalks_Usecase const& configuration) + { + raft::handle_t handle{}; + + // debuf info: + // + // std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; + + cugraph::experimental::graph_t graph(handle); + std::tie(graph, std::ignore) = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted, false); + + auto graph_view = graph.view(); + + // call random_walks: + start_random_walks(graph_view); + } + + template + void start_random_walks(graph_vt const& graph_view) + { + using vertex_t = typename graph_vt::vertex_type; + using edge_t = typename graph_vt::edge_type; + using weight_t = typename graph_vt::weight_type; + + raft::handle_t handle{}; + edge_t num_paths = 10; + rmm::device_uvector d_start(num_paths, handle.get_stream()); + + vertex_t num_vertices = graph_view.get_number_of_vertices(); + fill_start(handle, d_start, num_vertices); + + // 0-copy const device view: + // + cugraph::experimental::detail::device_const_vector_view d_start_view{ + d_start.data(), num_paths}; + + edge_t max_depth{10}; + + auto ret_tuple = + cugraph::experimental::detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + // check results: + // + bool test_all_paths = cugraph::test::host_check_rw_paths( + handle, graph_view, std::get<0>(ret_tuple), std::get<1>(ret_tuple), std::get<2>(ret_tuple)); + + if (!test_all_paths) + std::cout << "starting seed on failure: " << std::get<3>(ret_tuple) << '\n'; + + ASSERT_TRUE(test_all_paths); + } +}; + +TEST_P(Tests_RandomWalks, Initialize_i32_i32_f) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_RandomWalks, + ::testing::Values(RandomWalks_Usecase("test/datasets/karate.mtx", true), + RandomWalks_Usecase("test/datasets/web-Google.mtx", true), + RandomWalks_Usecase("test/datasets/ljournal-2008.mtx", true), + RandomWalks_Usecase("test/datasets/webbase-1M.mtx", true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/random_walks_utils.cuh b/cpp/tests/experimental/random_walks_utils.cuh new file mode 100644 index 00000000000..863094dc310 --- /dev/null +++ b/cpp/tests/experimental/random_walks_utils.cuh @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +// utilities for testing / verification of Random Walks functionality: +// +namespace cugraph { +namespace test { + +template +using vector_test_t = cugraph::experimental::detail::device_vec_t; // for debug purposes + +// host side utility to check a if a sequence of vertices is connected: +// +template +bool host_check_path(std::vector const& row_offsets, + std::vector const& col_inds, + std::vector const& values, + typename std::vector::const_iterator v_path_begin, + typename std::vector::const_iterator v_path_end, + typename std::vector::const_iterator w_path_begin) +{ + bool assert1 = (row_offsets.size() > 0); + bool assert2 = (col_inds.size() == values.size()); + + vertex_t num_rows = row_offsets.size() - 1; + edge_t nnz = row_offsets.back(); + + bool assert3 = (nnz == static_cast(col_inds.size())); + if (assert1 == false || assert2 == false || assert3 == false) { + std::cout << "CSR inconsistency\n"; + return false; + } + + auto it_w = w_path_begin; + for (auto it_v = v_path_begin; it_v != v_path_end - 1; ++it_v, ++it_w) { + auto crt_vertex = *it_v; + auto next_vertex = *(it_v + 1); + + auto begin = col_inds.begin() + row_offsets[crt_vertex]; + auto end = col_inds.begin() + row_offsets[crt_vertex + 1]; + auto found_next = std::find_if( + begin, end, [next_vertex](auto dst_vertex) { return dst_vertex == next_vertex; }); + if (found_next == end) { + std::cout << "vertex not found: " << next_vertex << " as neighbor of " << crt_vertex << '\n'; + return false; + } + + auto delta = row_offsets[crt_vertex] + std::distance(begin, found_next); + + // std::cout << "delta in ci: " << delta << '\n'; + auto found_edge = values.begin() + delta; + if (*found_edge != *it_w) { + std::cout << "weight not found: " << *found_edge << " between " << crt_vertex << " and " + << next_vertex << '\n'; + return false; + } + } + return true; +} + +template +bool host_check_rw_paths( + raft::handle_t const& handle, + cugraph::experimental::graph_view_t const& graph_view, + vector_test_t const& d_coalesced_v, + vector_test_t const& d_coalesced_w, + vector_test_t const& d_sizes) +{ + edge_t num_edges = graph_view.get_number_of_edges(); + vertex_t num_vertices = graph_view.get_number_of_vertices(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_coalesced(d_coalesced_v.size()); + std::vector w_coalesced(d_coalesced_w.size()); + std::vector v_sizes(d_sizes.size()); + + raft::update_host(v_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_v), + d_coalesced_v.size(), + handle.get_stream()); + raft::update_host(w_coalesced.data(), + cugraph::experimental::detail::raw_const_ptr(d_coalesced_w), + d_coalesced_w.size(), + handle.get_stream()); + raft::update_host(v_sizes.data(), + cugraph::experimental::detail::raw_const_ptr(d_sizes), + d_sizes.size(), + handle.get_stream()); + + auto it_v_begin = v_coalesced.begin(); + auto it_w_begin = w_coalesced.begin(); + for (auto&& crt_sz : v_sizes) { + auto it_v_end = it_v_begin + crt_sz; + + bool test_path = host_check_path(v_ro, v_ci, v_vals, it_v_begin, it_v_end, it_w_begin); + + it_v_begin = it_v_end; + it_w_begin += crt_sz - 1; + + if (!test_path) { // something went wrong; print to debug (since it's random) + raft::print_host_vector("sizes", v_sizes.data(), v_sizes.size(), std::cout); + + raft::print_host_vector("coalesced v", v_coalesced.data(), v_coalesced.size(), std::cout); + + raft::print_host_vector("coalesced w", w_coalesced.data(), w_coalesced.size(), std::cout); + + return false; + } + } + return true; +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/experimental/rw_low_level_test.cu new file mode 100644 index 00000000000..a32e258d366 --- /dev/null +++ b/cpp/tests/experimental/rw_low_level_test.cu @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "cuda_profiler_api.h" + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "random_walks_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include + +using namespace cugraph::experimental; + +template +using vector_test_t = detail::device_vec_t; // for debug purposes + +namespace { // anonym. + +template +graph_t make_graph(raft::handle_t const& handle, + std::vector const& v_src, + std::vector const& v_dst, + std::vector const& v_w, + vertex_t num_vertices, + edge_t num_edges) +{ + vector_test_t d_src(num_edges, handle.get_stream()); + vector_test_t d_dst(num_edges, handle.get_stream()); + vector_test_t d_weights(num_edges, handle.get_stream()); + + raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); + raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + edgelist_t edgelist{ + d_src.data(), d_dst.data(), d_weights.data(), num_edges}; + + graph_t graph( + handle, edgelist, num_vertices, graph_properties_t{}, false); + + return graph; +} + +template +bool check_col_indices(raft::handle_t const& handle, + vector_test_t const& d_crt_out_degs, + vector_test_t const& d_col_indx, + index_t num_paths) +{ + bool all_indices_within_degs = thrust::all_of( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_paths), + [p_d_col_indx = detail::raw_const_ptr(d_col_indx), + p_d_crt_out_degs = detail::raw_const_ptr(d_crt_out_degs)] __device__(auto indx) { + if (p_d_crt_out_degs[indx] > 0) + return ((p_d_col_indx[indx] >= 0) && (p_d_col_indx[indx] < p_d_crt_out_degs[indx])); + else + return true; + }); + return all_indices_within_degs; +} + +} // namespace + +// FIXME (per rlratzel request): +// This test may be considered an e2e test +// which could be moved to a different test suite: +// +struct RandomWalksPrimsTest : public ::testing::Test { +}; + +TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vs(num_edges); + + raft::update_host(v_ro.data(), offsets, num_vertices + 1, handle.get_stream()); + raft::update_host(v_ci.data(), indices, num_edges, handle.get_stream()); + raft::update_host(v_vs.data(), values, num_edges, handle.get_stream()); + + std::vector v_ro_expected{0, 1, 3, 6, 7, 8, 8}; + std::vector v_ci_expected{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_vs_expected{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + EXPECT_EQ(v_ro, v_ro_expected); + EXPECT_EQ(v_ci, v_ci_expected); + EXPECT_EQ(v_vs, v_vs_expected); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + + std::vector v_sizes{1, 1, 1, 1}; + std::vector v_sz_exp(num_paths); + raft::update_host(v_sz_exp.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + + EXPECT_EQ(v_sizes, v_sz_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + EXPECT_EQ(static_cast(num_vertices), d_out_degs.size()); + + std::vector v_out_degs(num_vertices); + raft::update_host( + v_out_degs.data(), raw_const_ptr(d_out_degs), num_vertices, handle.get_stream()); + + std::vector v_out_degs_exp{1, 2, 3, 1, 1, 0}; + EXPECT_EQ(v_out_degs, v_out_degs_exp); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + // random engine generated: + // + vector_test_t d_col_indx(num_paths, handle.get_stream()); + vector_test_t d_random(num_paths, handle.get_stream()); + + seed_t seed = static_cast(std::time(nullptr)); + random_engine_t rgen(handle, num_paths, d_random, d_crt_out_degs, seed); + rgen.generate_col_indices(d_col_indx); + + bool all_indices_within_degs = check_col_indices(handle, d_crt_out_degs, d_col_indx, num_paths); + + ASSERT_TRUE(all_indices_within_degs); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + using real_t = float; + using seed_t = long; + + using random_engine_t = rrandom_gen_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // Fixed set of out-degs, as opposed to have them generated by the algorithm. + // That's because I want to test a certain functionality in isolation + // + std::vector v_crt_out_degs{2, 0, 1, 0}; + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + raft::update_device( + d_crt_out_degs.data(), v_crt_out_degs.data(), d_crt_out_degs.size(), handle.get_stream()); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 1, 2, 1}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + + EXPECT_EQ(v_sizes, v_sizes_exp); +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_coalesced(total_sz, -1); + std::vector w_coalesced(total_sz - num_paths, -1); + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_start(num_paths, handle.get_stream()); + + raft::update_device(d_start.data(), v_start.data(), d_start.size(), handle.get_stream()); + + vector_test_t d_sizes(num_paths, handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + auto const& d_out_degs = rand_walker.get_out_degs(); + + rand_walker.start(d_start, d_coalesced_v, d_sizes); + + // update crt_out_degs: + // + vector_test_t d_crt_out_degs(num_paths, handle.get_stream()); + rand_walker.gather_from_coalesced( + d_coalesced_v, d_out_degs, d_sizes, d_crt_out_degs, max_depth, num_paths); + + col_indx_extract_t col_extractor{handle, + graph_view, + raw_const_ptr(d_crt_out_degs), + raw_const_ptr(d_sizes), + num_paths, + max_depth}; + + // typically given by random engine: + // + std::vector v_col_indx{1, 0, 0, 2}; + vector_test_t d_col_indx(num_paths, handle.get_stream()); + + raft::update_device(d_col_indx.data(), v_col_indx.data(), d_col_indx.size(), handle.get_stream()); + + vector_test_t d_next_v(num_paths, handle.get_stream()); + vector_test_t d_next_w(num_paths, handle.get_stream()); + + col_extractor(d_coalesced_v, d_col_indx, d_next_v, d_next_w); + + rand_walker.update_path_sizes(d_crt_out_degs, d_sizes); + + // check start(): + // + { + std::vector v_coalesced_exp{1, -1, -1, 0, -1, -1, 4, -1, -1, 2, -1, -1}; + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + EXPECT_EQ(v_coalesced, v_coalesced_exp); + } + + // check crt_out_degs: + // + { + std::vector v_crt_out_degs(num_paths); + raft::update_host( + v_crt_out_degs.data(), raw_const_ptr(d_crt_out_degs), num_paths, handle.get_stream()); + std::vector v_crt_out_degs_exp{2, 1, 1, 3}; + EXPECT_EQ(v_crt_out_degs, v_crt_out_degs_exp); + } + + // check paths sizes update: + // + { + std::vector v_sizes(num_paths); + raft::update_host(v_sizes.data(), raw_const_ptr(d_sizes), num_paths, handle.get_stream()); + std::vector v_sizes_exp{2, 2, 2, 2}; + // i.e., corresponding 0-entries in crt-out-degs, don't get updated; + EXPECT_EQ(v_sizes, v_sizes_exp); + } + + // check next step: + // + { + std::vector v_next_v(num_paths); + std::vector v_next_w(num_paths); + + raft::update_host(v_next_v.data(), raw_const_ptr(d_next_v), num_paths, handle.get_stream()); + raft::update_host(v_next_w.data(), raw_const_ptr(d_next_w), num_paths, handle.get_stream()); + + std::vector v_next_v_exp{4, 1, 5, 3}; + std::vector v_next_w_exp{2.1f, 0.1f, 7.1f, 5.1f}; + + EXPECT_EQ(v_next_v, v_next_v_exp); + EXPECT_EQ(v_next_w, v_next_w_exp); + } + + rand_walker.scatter_vertices(d_next_v, d_coalesced_v, d_crt_out_degs, d_sizes); + rand_walker.scatter_weights(d_next_w, d_coalesced_w, d_crt_out_degs, d_sizes); + + // check vertex/weight scatter: + // + { + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), total_sz, handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), total_sz - num_paths, handle.get_stream()); + + std::vector v_coalesced_exp{1, 4, -1, 0, 1, -1, 4, 5, -1, 2, 3, -1}; + std::vector w_coalesced_exp{2.1, -1, 0.1, -1, 7.1, -1, 5.1, -1}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) +{ + using namespace cugraph::experimental::detail; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + index_t num_paths = 4; + index_t max_depth = 3; + index_t total_sz = num_paths * max_depth; + + std::vector v_sizes{1, 2, 2, 1}; + vector_test_t d_sizes(num_paths, handle.get_stream()); + raft::update_device(d_sizes.data(), v_sizes.data(), d_sizes.size(), handle.get_stream()); + + std::vector v_coalesced(total_sz, -1); + v_coalesced[0] = 3; + v_coalesced[max_depth] = 5; + v_coalesced[max_depth + 1] = 2; + v_coalesced[2 * max_depth] = 4; + v_coalesced[2 * max_depth + 1] = 0; + v_coalesced[3 * max_depth] = 1; + + std::vector w_coalesced(total_sz - num_paths, -1); + w_coalesced[max_depth - 1] = 10.1; + w_coalesced[2 * max_depth - 2] = 11.2; + + vector_test_t d_coalesced_v(total_sz, handle.get_stream()); + vector_test_t d_coalesced_w(total_sz - num_paths, handle.get_stream()); + + raft::update_device( + d_coalesced_v.data(), v_coalesced.data(), d_coalesced_v.size(), handle.get_stream()); + raft::update_device( + d_coalesced_w.data(), w_coalesced.data(), d_coalesced_w.size(), handle.get_stream()); + + random_walker_t rand_walker{handle, graph_view, num_paths, max_depth}; + + rand_walker.stop(d_coalesced_v, d_coalesced_w, d_sizes); + + // check vertex/weight defragment: + // + { + v_coalesced.resize(d_coalesced_v.size()); + w_coalesced.resize(d_coalesced_w.size()); + + raft::update_host( + v_coalesced.data(), raw_const_ptr(d_coalesced_v), d_coalesced_v.size(), handle.get_stream()); + raft::update_host( + w_coalesced.data(), raw_const_ptr(d_coalesced_w), d_coalesced_w.size(), handle.get_stream()); + + std::vector v_coalesced_exp{3, 5, 2, 4, 0, 1}; + std::vector w_coalesced_exp{10.1, 11.2}; + + EXPECT_EQ(v_coalesced, v_coalesced_exp); + EXPECT_EQ(w_coalesced, w_coalesced_exp); + } +} + +TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) +{ + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + + auto graph_view = graph.view(); + + edge_t const* offsets = graph_view.offsets(); + vertex_t const* indices = graph_view.indices(); + weight_t const* values = graph_view.weights(); + + std::vector v_ro(num_vertices + 1); + std::vector v_ci(num_edges); + std::vector v_vals(num_edges); + + raft::update_host(v_ro.data(), offsets, v_ro.size(), handle.get_stream()); + raft::update_host(v_ci.data(), indices, v_ci.size(), handle.get_stream()); + raft::update_host(v_vals.data(), values, v_vals.size(), handle.get_stream()); + + std::vector v_start{1, 0, 4, 2}; + vector_test_t d_v_start(v_start.size(), handle.get_stream()); + raft::update_device(d_v_start.data(), v_start.data(), d_v_start.size(), handle.get_stream()); + + index_t num_paths = v_start.size(); + index_t max_depth = 5; + + // 0-copy const device view: + // + detail::device_const_vector_view d_start_view{d_v_start.data(), num_paths}; + auto quad = detail::random_walks_impl(handle, graph_view, d_start_view, max_depth); + + auto& d_coalesced_v = std::get<0>(quad); + auto& d_coalesced_w = std::get<1>(quad); + auto& d_sizes = std::get<2>(quad); + auto seed0 = std::get<3>(quad); + + bool test_all_paths = + cugraph::test::host_check_rw_paths(handle, graph_view, d_coalesced_v, d_coalesced_w, d_sizes); + + if (!test_all_paths) std::cout << "starting seed on failure: " << seed0 << '\n'; + + ASSERT_TRUE(test_all_paths); +} From 7a2b02b4a867d7b18b5323c31c4dfb8030f095ab Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Wed, 31 Mar 2021 14:14:23 -0500 Subject: [PATCH 36/51] Multiple graph generator with power law distribution on sizes (#1483) Add a function that leverages the RMAT generator and creates k graphs with a power-law or uniform distribution of graph sizes. Closes #1458 Consider adding bindings for this as part of #1473 Authors: - Alex Fender (https://github.com/afender) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1483 --- cpp/include/experimental/graph_generator.hpp | 55 +++++++++++- .../experimental/generate_rmat_edgelist.cu | 78 ++++++++++++++++- cpp/tests/experimental/generate_rmat_test.cpp | 86 +++++++++++++++++++ 3 files changed, 217 insertions(+), 2 deletions(-) diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp index b8495ed7581..bc7337944f3 100644 --- a/cpp/include/experimental/graph_generator.hpp +++ b/cpp/include/experimental/graph_generator.hpp @@ -72,7 +72,7 @@ template std::tuple, rmm::device_uvector> generate_rmat_edgelist( raft::handle_t const& handle, size_t scale, - size_t edge_factor = 16, + size_t num_edges, double a = 0.57, double b = 0.19, double c = 0.19, @@ -80,5 +80,58 @@ std::tuple, rmm::device_uvector> generat bool clip_and_flip = false, bool scramble_vertex_ids = false); +enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; + +/** + * @brief generate multiple edge lists using the R-mat graph generator. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 + * specification (note that scrambling does not affect cuGraph's graph construction performance, so + * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (inculding the diagonal) of the graph adjacency matrix. + * + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param n_edgelists Number of edge lists (graphs) to generate + * @param min_scale Scale factor to set the minimum number of verties in the graph. + * @param max_scale Scale factor to set the maximum number of verties in the graph. + * @param edge_factor Average number of edges per vertex to generate. + * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the + * R-MAT generator + * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + * are set. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values + * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * @return A vector of std::tuple, rmm::device_uvector> of + *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge + *source vertex IDs and edge destination vertex IDs. + */ +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists( + raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor = 16, + generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, + generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index 0a6d666432f..185fa837a70 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -27,7 +27,9 @@ #include #include +#include #include +#include "rmm/detail/error.hpp" namespace cugraph { namespace experimental { @@ -121,7 +123,57 @@ std::tuple, rmm::device_uvector> generat return std::make_tuple(std::move(srcs), std::move(dsts)); } -// explicit instantiation +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); + CUGRAPH_EXPECTS(size_t{1} << max_scale <= std::numeric_limits::max(), + "Invalid input argument: scale too large for vertex_t."); + + std::vector, rmm::device_uvector>> output{}; + output.reserve(n_edgelists); + std::vector scale(n_edgelists); + + std::default_random_engine eng; + eng.seed(seed); + if (component_distribution == generator_distribution_t::UNIFORM) { + std::uniform_int_distribution dist(min_scale, max_scale); + std::generate(scale.begin(), scale.end(), [&dist, &eng]() { return dist(eng); }); + } else { + // May expose this as a parameter in the future + std::exponential_distribution dist(4); + // The modulo is here to protect the range because exponential distribution is defined on + // [0,infinity). With exponent 4 most values are between 0 and 1 + auto range = max_scale - min_scale; + std::generate(scale.begin(), scale.end(), [&dist, &eng, &min_scale, &range]() { + return min_scale + static_cast(static_cast(range) * dist(eng)) % range; + }); + } + + // intialized to standard powerlaw values + double a = 0.57, b = 0.19, c = 0.19; + if (edge_distribution == generator_distribution_t::UNIFORM) { + a = 0.25; + b = a; + c = a; + } + + for (size_t i = 0; i < n_edgelists; i++) { + output.push_back(generate_rmat_edgelist( + handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids)); + } + return output; +} template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -145,5 +197,29 @@ generate_rmat_edgelist(raft::handle_t const& handle, bool clip_and_flip, bool scramble_vertex_ids); +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector, rmm::device_uvector>> +generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t component_distribution, + generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + } // namespace experimental } // namespace cugraph diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 249a1a3c6c8..666106d62ca 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -281,5 +282,90 @@ INSTANTIATE_TEST_CASE_P(simple_test, GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true), GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false))); +typedef struct GenerateRmats_Usecase_t { + size_t n_edgelists{0}; + size_t min_scale{0}; + size_t max_scale{0}; + size_t edge_factor{0}; + cugraph::experimental::generator_distribution_t component_distribution; + cugraph::experimental::generator_distribution_t edge_distribution; + + GenerateRmats_Usecase_t(size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::experimental::generator_distribution_t component_distribution, + cugraph::experimental::generator_distribution_t edge_distribution) + : n_edgelists(n_edgelists), + min_scale(min_scale), + max_scale(max_scale), + component_distribution(component_distribution), + edge_distribution(edge_distribution), + edge_factor(edge_factor){}; +} GenerateRmats_Usecase; +class Tests_GenerateRmats : public ::testing::TestWithParam { + public: + Tests_GenerateRmats() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + template + void run_current_test(GenerateRmats_Usecase const& configuration) + { + raft::handle_t handle{}; + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto outputs = + cugraph::experimental::generate_rmat_edgelists(handle, + configuration.n_edgelists, + configuration.min_scale, + configuration.max_scale, + configuration.edge_factor, + configuration.component_distribution, + configuration.edge_distribution, + uint64_t{0}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + ASSERT_EQ(configuration.n_edgelists, outputs.size()); + for (auto i = outputs.begin(); i != outputs.end(); ++i) { + ASSERT_EQ(std::get<0>(*i).size(), std::get<1>(*i).size()); + ASSERT_TRUE((configuration.min_scale * configuration.edge_factor) <= std::get<0>(*i).size()); + ASSERT_TRUE((configuration.max_scale * configuration.edge_factor) >= std::get<0>(*i).size()); + } + } +}; +TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_GenerateRmats, + ::testing::Values( + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::experimental::generator_distribution_t::UNIFORM, + cugraph::experimental::generator_distribution_t::POWER_LAW), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::experimental::generator_distribution_t::POWER_LAW, + cugraph::experimental::generator_distribution_t::POWER_LAW))); CUGRAPH_TEST_PROGRAM_MAIN() From daa96221a9b32728897b9ed9785bf91195665e46 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 31 Mar 2021 16:10:03 -0400 Subject: [PATCH 37/51] Create C++ documentation (#1489) Improve the C++ documentation for the following algorithms: Pagerank, SSSP, BFS, and Louvain. We need, generally, to improve the C++ documentation for cuGraph. This is intended to improve the documentation for the algorithms that have been adapted to use the new graph primitives and will hopefully provide a construct that we can use for updating other C++ documentation. As we migrate new algorithms to use the new graph primitives over the next several releases we will create documentation for those algorithms as well. Closes #1490 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1489 --- README.md | 10 ++--- cpp/src/centrality/README.md | 81 ++++++++++++++++++++++++++++++++++++ cpp/src/community/README.md | 79 +++++++++++++++++++++++++++++++++++ cpp/src/traversal/README.md | 56 +++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 cpp/src/centrality/README.md create mode 100644 cpp/src/community/README.md create mode 100644 cpp/src/traversal/README.md diff --git a/README.md b/README.md index 77377fe2bbc..4bdbcd00280 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ As of Release 0.18 - including 0.18 nightly | Community | | | | | | EgoNet | Single-GPU | | | | Leiden | Single-GPU | | -| | Louvain | Multi-GPU | | +| | Louvain | Multi-GPU | [C++ README](cpp/src/community/README.md#Louvain) | | | Ensemble Clustering for Graphs | Single-GPU | | | | Spectral-Clustering - Balanced Cut | Single-GPU | | | | Spectral-Clustering - Modularity | Single-GPU | | @@ -71,16 +71,16 @@ As of Release 0.18 - including 0.18 nightly | Linear Assignment| | | | | | Hungarian | Single-GPU | [README](cpp/src/linear_assignment/README-hungarian.md) | | Link Analysis| | | | -| | Pagerank | Multi-GPU | | -| | Personal Pagerank | Multi-GPU | | +| | Pagerank | Multi-GPU | [C++ README](cpp/src/centrality/README.md#Pagerank) | +| | Personal Pagerank | Multi-GPU | [C++ README](cpp/src/centrality/README.md#Personalized-Pagerank) | | | HITS | Single-GPU | leverages Gunrock | | Link Prediction | | | | | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | | Traversal | | | | -| | Breadth First Search (BFS) | Multi-GPU | with cutoff support | -| | Single Source Shortest Path (SSSP) | Multi-GPU | | +| | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) | +| | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) | | | Traveling Salesperson Problem (TSP) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | diff --git a/cpp/src/centrality/README.md b/cpp/src/centrality/README.md new file mode 100644 index 00000000000..db7838fb0cc --- /dev/null +++ b/cpp/src/centrality/README.md @@ -0,0 +1,81 @@ +# Centrality algorithms +cuGraph Pagerank is implemented using our graph primitive library + +## Pagerank + +The unit test code is the best place to search for examples on calling pagerank. + + * [SG Implementation](../../tests/experimental/pagerank_test.cpp) + * [MG Implementation](../../tests/pagerank/mg_pagerank_test.cpp) + +## Simple pagerank + +The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +result_t constexpr alpha{0.85}; +result_t constexpr epsilon{1e-6}; + +rmm::device_uvector pageranks_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +// pagerank optionally supports three additional parameters: +// max_iterations - maximum number of iterations, if pagerank doesn't coverge by +// then we abort +// has_initial_guess - if true, values in the pagerank array when the call is initiated +// will be used as the initial pagerank values. These values will +// be normalized before use. If false (the default), the values +// in the pagerank array will be set to 1/num_vertices before +// starting the computation. +// do_expensive_check - perform extensive validation of the input data before +// executing algorithm. Off by default. Note: turning this on +// is expensive +cugraph::experimental::pagerank(handle, graph_view, nullptr, nullptr, nullptr, vertex_t{0}, + pageranks_v.data(), alpha, epsilon); +``` + +## Personalized Pagerank + +The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. Additionally, the caller must create personalization_vertices and personalized_values vectors in device memory, populate them and pass in the raw pointers to those vectors. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t number_of_personalization_vertices; // Provided by caller + +result_t constexpr alpha{0.85}; +result_t constexpr epsilon{1e-6}; + +rmm::device_uvector pageranks_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector personalization_vertices(number_of_personalization_vertices, handle.get_stream()); +rmm::device_uvector personalization_values(number_of_personalization_vertices, handle.get_stream()); + +// Populate personalization_vertices, personalization_values with user provided data + +// pagerank optionally supports three additional parameters: +// max_iterations - maximum number of iterations, if pagerank doesn't coverge by +// then we abort +// has_initial_guess - if true, values in the pagerank array when the call is initiated +// will be used as the initial pagerank values. These values will +// be normalized before use. If false (the default), the values +// in the pagerank array will be set to 1/num_vertices before +// starting the computation. +// do_expensive_check - perform extensive validation of the input data before +// executing algorithm. Off by default. Note: turning this on +// is expensive +cugraph::experimental::pagerank(handle, graph_view, nullptr, personalization_vertices.data(), + personalization_values.data(), number_of_personalization_vertices, + pageranks_v.data(), alpha, epsilon); +``` diff --git a/cpp/src/community/README.md b/cpp/src/community/README.md new file mode 100644 index 00000000000..4bff0a6e77e --- /dev/null +++ b/cpp/src/community/README.md @@ -0,0 +1,79 @@ +# Louvain and Related Clustering Algorithms +cuGraph contains a GPU implementation of the Louvain algorithm and several related clustering algorithms (Leiden and ECG). + +## Louvain + +The Louvain implementation is designed to assign clusters attempting to optimize modularity. The algorithm is derived from the serial implementation described in the following paper: + + * VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast unfolding of community hierarchies in large networks, J Stat Mech P10008 (2008), http://arxiv.org/abs/0803.0476 + +It leverages some parallelism ideas from the following paper: + * Hao Lu, Mahantesh Halappanavar, Ananth Kalyanaraman: Parallel heuristics for scalable community detection, Elsevier Parallel Computing (2015), https://www.sciencedirect.com/science/article/pii/S0167819115000472 + + +The challenge in parallelizing Louvain lies in the primary loop which visits the vertices in serial. For each vertex v the change in modularity is computed for moving the vertex from its currently assigned cluster to each of the clusters to which v's neighbors are assigned. The largest positive delta modularity is used to select a new cluster (if there are no positive delta modularities then the vertex is not moved). If the vertex v is moved to a new cluster then the statistics of the vertex v's old cluster and new cluster change. This change in cluster statistics may affect the delta modularity computations of all vertices that follow vertex v in the serial iteration, creating a dependency between the different iterations of the loop. + +In order to make efficient use of the GPU parallelism, the cuGraph implementation computes the delta modularity for *all* vertex/neighbor pairs using the *current* vertex assignment. Decisions on moving vertices will be made based upon these delta modularities. This will potentially make choices that the serial version would not make. In order to minimize some of the negative effects of this (as described in the Lu paper), the cuGraph implementation uses an Up/Down technique. In even numbered iterations a vertex can only move from cluster i to cluster j if i > j; in odd numbered iterations a vertex can only move from cluster i to cluster j if i < j. This prevents two vertices from swapping clusters in the same iteration of the loop. We have had great success in converging on high modularity clustering using this technique. + +## Calling Louvain + +The unit test code is the best place to search for examples on calling louvain. + + * [SG Implementation](../../tests/community/louvain_test.cpp) + * [MG Implementation](../../tests/community/mg_louvain_test.cpp) + +The API itself is very simple. There are two variations: + * Return a flat clustering + * Return a Dendrogram + +### Return a flat clustering + +The example assumes that you create an SG or MG graph somehow. The caller must create the clustering vector in device memory and pass in the raw pointer to that vector into the louvain function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +size_t level; +weight_t modularity; + +rmm::device_uvector clustering_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +// louvain optionally supports two additional parameters: +// max_level - maximum level of the Dendrogram +// resolution - constant in the modularity computation +std::tie(level, modularity) = cugraph::louvain(handle, graph_view, clustering_v.data()); +``` + +### Return a Dendrogram + +The Dendrogram represents the levels of hierarchical clustering that the Louvain algorithm computes. There is a separate function that will flatten the clustering into the same result as above. Returning the Dendrogram, however, provides a finer level of detail on the intermediate results which can be helpful in more fully understanding the data. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow + +cugraph::Dendrogram dendrogram; +weight_t modularity; + +// louvain optionally supports two additional parameters: +// max_level - maximum level of the Dendrogram +// resolution - constant in the modularity computation +std::tie(dendrogram, modularity) = cugraph::louvain(handle, graph_view); + +// This will get the equivalent result to the earlier example +rmm::device_uvector clustering_v(graph_view.get_number_of_vertices(), handle.get_stream()); +cugraph::flatten_dendrogram(handle, graph_view, dendrogram, clustering.data()); +``` + +## Leiden + +## ECG diff --git a/cpp/src/traversal/README.md b/cpp/src/traversal/README.md new file mode 100644 index 00000000000..7f436926de8 --- /dev/null +++ b/cpp/src/traversal/README.md @@ -0,0 +1,56 @@ +# Traversal +cuGraph traversal algorithms are contained in this directory + +## SSSP + +The unit test code is the best place to search for examples on calling SSSP. + + * [SG Implementation](../../tests/experimental/sssp_test.cpp) + * MG Implementation - TBD + +## Simple SSSP + +The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the SSSP function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t source; // Initialized by user + +rmm::device_uvector distances_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector predecessors_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +cugraph::experimental::sssp(handle, graph_view, distances_v.begin(), predecessors_v.begin(), source, std::numeric_limits::max(), false); +``` + +## BFS + +The unit test code is the best place to search for examples on calling BFS. + + * [SG Implementation](../../tests/experimental/bfs_test.cpp) + * MG Implementation - TBD + +## Simple BFS + +The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the BFS function. + +```cpp +#include +... +using vertex_t = int32_t; // or int64_t, whichever is appropriate +using weight_t = float; // or double, whichever is appropriate +using result_t = weight_t; // could specify float or double also +raft::handle_t handle; // Must be configured if MG +auto graph_view = graph.view(); // assumes you have created a graph somehow +vertex_t source; // Initialized by user + +rmm::device_uvector distances_v(graph_view.get_number_of_vertices(), handle.get_stream()); +rmm::device_uvector predecessors_v(graph_view.get_number_of_vertices(), handle.get_stream()); + +cugraph::experimental::bfs(handle, graph_view, d_distances.begin(), d_predecessors.begin(), source, false, std::numeric_limits::max(), false); +``` From 59f6df62badb2f85c236d207ba0fbaab236079d4 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Wed, 31 Mar 2021 18:02:37 -0400 Subject: [PATCH 38/51] Revert "Update conda recipes pinning of repo dependencies" (#1493) Reverts rapidsai/cugraph#1485 Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - Mike Wendt (https://github.com/mike-wendt) URL: https://github.com/rapidsai/cugraph/pull/1493 --- conda/recipes/cugraph/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 4b845583181..1ef64ddbe72 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -25,13 +25,13 @@ requirements: build: - python x.x - cython>=0.29,<0.30 - - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} + - libcugraph={{ version }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} - ucx-proc=*=gpu run: - python x.x - - libcugraph={{ version }}=*_{{ GIT_DESCRIBE_NUMBER }} + - libcugraph={{ version }} - cudf={{ minor_version }} - dask-cudf {{ minor_version }} - dask-cuda {{ minor_version }} From a7b634286a6bdb711e03ca1eefb90e88db513ab6 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 2 Apr 2021 12:31:25 -0500 Subject: [PATCH 39/51] Fix MNMG test failures and skip tests that are not supported on Pascal (#1498) * Made various fixes to `test_mg_betweenness_centrality.py` to address recent updates that were breaking these tests. * @afender updated egonet.cu to address an occasional test failure related to mem usage in certain environments. * @rlratzel updated python and notebook tests that contain code not supported on Pascal to be skipped when running on that arch. Also added a script to detect Pascal and used it to skip **all** C++ tests. _Note: a better way would be to edit each C++ test to use GTEST_SKIP to conditionally skip on Pascal, but since there are now so many to skip, and we get coverage from Python, this was the faster solution without sacrificing much/any coverage for a single platform._ Authors: - Joseph Nke (https://github.com/jnke2016) - Alex Fender (https://github.com/afender) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Alex Fender (https://github.com/afender) - Rick Ratzel (https://github.com/rlratzel) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1498 --- benchmarks/bench_algos.py | 32 +++++ ci/test.sh | 18 ++- ci/utils/is_pascal.py | 39 ++++++ cpp/src/community/egonet.cu | 6 +- notebooks/centrality/Betweenness.ipynb | 1 + notebooks/centrality/Katz.ipynb | 1 + notebooks/community/Spectral-Clustering.ipynb | 1 + notebooks/community/Triangle-Counting.ipynb | 1 + .../components/ConnectedComponents.ipynb | 1 + notebooks/cores/core-number.ipynb | 1 + notebooks/cores/kcore.ipynb | 1 + notebooks/cores/ktruss.ipynb | 1 + notebooks/link_analysis/HITS.ipynb | 1 + .../link_prediction/Jaccard-Similarity.ipynb | 1 + .../link_prediction/Overlap-Similarity.ipynb | 1 + notebooks/structure/Renumber-2.ipynb | 1 + notebooks/structure/Renumber.ipynb | 1 + notebooks/structure/Symmetrize.ipynb | 1 + notebooks/traversal/SSSP.ipynb | 1 + .../test_mg_batch_betweenness_centrality.py | 5 +- python/cugraph/tests/test_balanced_cut.py | 12 +- .../tests/test_betweenness_centrality.py | 124 ++++++++++-------- python/cugraph/tests/test_bfs.py | 15 ++- python/cugraph/tests/test_connectivity.py | 24 +++- python/cugraph/tests/test_convert_matrix.py | 15 ++- python/cugraph/tests/test_core_number.py | 9 +- python/cugraph/tests/test_ecg.py | 3 + .../tests/test_edge_betweenness_centrality.py | 21 ++- python/cugraph/tests/test_egonet.py | 7 + .../cugraph/tests/test_filter_unreachable.py | 6 +- python/cugraph/tests/test_graph.py | 60 ++++++++- python/cugraph/tests/test_hits.py | 6 +- python/cugraph/tests/test_hypergraph.py | 68 +++++++++- python/cugraph/tests/test_jaccard.py | 16 +++ python/cugraph/tests/test_k_core.py | 9 +- python/cugraph/tests/test_k_truss_subgraph.py | 7 + python/cugraph/tests/test_katz_centrality.py | 9 +- .../tests/test_maximum_spanning_tree.py | 9 +- .../tests/test_minimum_spanning_tree.py | 9 +- python/cugraph/tests/test_modularity.py | 6 +- python/cugraph/tests/test_multigraph.py | 23 ++++ python/cugraph/tests/test_nx_convert.py | 13 +- python/cugraph/tests/test_overlap.py | 9 +- python/cugraph/tests/test_pagerank.py | 7 + python/cugraph/tests/test_paths.py | 41 +++++- python/cugraph/tests/test_renumber.py | 30 ++++- python/cugraph/tests/test_sssp.py | 21 ++- .../cugraph/tests/test_subgraph_extraction.py | 14 +- python/cugraph/tests/test_triangle_count.py | 12 +- python/cugraph/tests/test_utils.py | 7 + python/cugraph/tests/test_wjaccard.py | 6 +- python/cugraph/tests/test_woverlap.py | 7 +- 52 files changed, 633 insertions(+), 107 deletions(-) create mode 100644 ci/utils/is_pascal.py diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index f9f8bf9cf53..14c15ebc08c 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -162,6 +162,8 @@ def anyGraphWithTransposedAdjListComputed(request): ############################################################################### # Benchmarks @pytest.mark.ETL +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_graph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -179,6 +181,8 @@ def bench_create_graph(gpubenchmark, edgelistCreated): warmup_iterations=10, max_time=0.005 ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_digraph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -188,27 +192,39 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): @pytest.mark.ETL +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_renumber(gpubenchmark, edgelistCreated): gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): gpubenchmark(cugraph.pagerank, anyGraphWithTransposedAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.force_atlas2, anyGraphWithAdjListComputed, max_iter=50) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) @@ -219,20 +235,28 @@ def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_weakly_connected_components(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.weakly_connected_components, anyGraphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.overlap, anyGraphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_triangles(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.triangles, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_spectralBalancedCutClustering(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.spectralBalancedCutClustering, @@ -247,19 +271,27 @@ def bench_spectralModularityMaximizationClustering( anyGraphWithAdjListComputed, 2) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degree(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degree) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degrees(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degrees) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.betweenness_centrality, anyGraphWithAdjListComputed, k=10, seed=123) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_edge_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.edge_betweenness_centrality, diff --git a/ci/test.sh b/ci/test.sh index 58cbb950f73..31660cd15ec 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -66,13 +66,17 @@ fi # EXITCODE for the script. set +e -echo "C++ gtests for cuGraph..." -for gt in tests/*_TEST; do - test_name=$(basename $gt) - echo "Running gtest $test_name" - ${gt} ${GTEST_FILTER} ${GTEST_ARGS} - echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" -done +if (python ${CUGRAPH_ROOT}/ci/utils/is_pascal.py); then + echo "WARNING: skipping C++ tests on Pascal GPU arch." +else + echo "C++ gtests for cuGraph..." + for gt in tests/*_TEST; do + test_name=$(basename $gt) + echo "Running gtest $test_name" + ${gt} ${GTEST_FILTER} ${GTEST_ARGS} + echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" + done +fi echo "Python pytest for cuGraph..." cd ${CUGRAPH_ROOT}/python diff --git a/ci/utils/is_pascal.py b/ci/utils/is_pascal.py new file mode 100644 index 00000000000..e55a3153a12 --- /dev/null +++ b/ci/utils/is_pascal.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +import glob + +from numba import cuda + +# FIXME: consolidate this code with ci/gpu/notebook_list.py + +# +# Not strictly true... however what we mean is +# Pascal or earlier +# +pascal = False + +device = cuda.get_current_device() +# check for the attribute using both pre and post numba 0.53 names +cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ + getattr(device, 'compute_capability') +if (cc[0] < 7): + pascal = True + +# Return zero (success) if pascal is True +if pascal: + sys.exit(0) +else: + sys.exit(1) diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 336a5c939b8..85ee327edb2 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -93,7 +93,6 @@ extract( hr_timer.start("ego_neighbors"); #endif -#pragma omp parallel for for (vertex_t i = 0; i < n_subgraphs; i++) { // get light handle from worker pool raft::handle_t light_handle(handle, i); @@ -152,8 +151,7 @@ extract( neighbors.resize(h_neighbors_offsets[n_subgraphs]); user_stream_view.synchronize(); -// Construct the neighboors list concurrently -#pragma omp parallel for + // Construct the neighboors list concurrently for (vertex_t i = 0; i < n_subgraphs; i++) { auto worker_stream_view = handle.get_internal_stream_view(i); thrust::copy(rmm::exec_policy(worker_stream_view), @@ -268,4 +266,4 @@ extract_ego(raft::handle_t const &, int64_t, int64_t); } // namespace experimental -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/notebooks/centrality/Betweenness.ipynb b/notebooks/centrality/Betweenness.ipynb index e4e33ef91e5..d748defe74c 100644 --- a/notebooks/centrality/Betweenness.ipynb +++ b/notebooks/centrality/Betweenness.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Betweenness Centrality\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Betweenness centrality for both vertices and edges in our test datase using cuGraph and NetworkX. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/centrality/Katz.ipynb b/notebooks/centrality/Katz.ipynb index 2330fc08de8..cdf8828b80a 100755 --- a/notebooks/centrality/Katz.ipynb +++ b/notebooks/centrality/Katz.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Katz Centrality\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Katz centrality of each vertex in our test datase using both cuGraph and NetworkX. Additionally, NetworkX also contains a Numpy implementation that will used. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/community/Spectral-Clustering.ipynb b/notebooks/community/Spectral-Clustering.ipynb index fcefae5eb60..a3aa538b062 100755 --- a/notebooks/community/Spectral-Clustering.ipynb +++ b/notebooks/community/Spectral-Clustering.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Spectral Clustering \n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using Spectral Clustering with both the (A) Balance Cut metric, and (B) the Modularity Maximization metric\n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 19d3f838fc6..796f6e59fe6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Triangle Counting\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will count the numner of trianges in our test dataset. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/components/ConnectedComponents.ipynb b/notebooks/components/ConnectedComponents.ipynb index a9c82e6669f..e9669d75b38 100755 --- a/notebooks/components/ConnectedComponents.ipynb +++ b/notebooks/components/ConnectedComponents.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Connected Components\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute weakly and strongly connected components of a graph and display some useful information about the resulting components.\n", "\n", diff --git a/notebooks/cores/core-number.ipynb b/notebooks/cores/core-number.ipynb index 6190f653020..127898fb094 100755 --- a/notebooks/cores/core-number.ipynb +++ b/notebooks/cores/core-number.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Core Number\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to compute the core number of every vertex in our test graph \n", diff --git a/notebooks/cores/kcore.ipynb b/notebooks/cores/kcore.ipynb index 342f4ecd5f7..250a1ea2aa5 100755 --- a/notebooks/cores/kcore.ipynb +++ b/notebooks/cores/kcore.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# K-Cores\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Cores clusters in a test graph \n", diff --git a/notebooks/cores/ktruss.ipynb b/notebooks/cores/ktruss.ipynb index e6470110666..2fe93247d67 100644 --- a/notebooks/cores/ktruss.ipynb +++ b/notebooks/cores/ktruss.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# K-Truss\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Truss clusters in a test graph \n", diff --git a/notebooks/link_analysis/HITS.ipynb b/notebooks/link_analysis/HITS.ipynb index 01fd22929d5..891133a277c 100755 --- a/notebooks/link_analysis/HITS.ipynb +++ b/notebooks/link_analysis/HITS.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# HITS\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use both NetworkX and cuGraph to compute HITS. \n", "The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 21835da1cce..9a53e559323 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Jaccard Similarity\n", + "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Jaccard vertex similarity metrics available in cuGraph. cuGraph supports:\n", diff --git a/notebooks/link_prediction/Overlap-Similarity.ipynb b/notebooks/link_prediction/Overlap-Similarity.ipynb index b8733ce4d80..ec02a8ebbea 100755 --- a/notebooks/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/link_prediction/Overlap-Similarity.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Overlap Similarity\n", + "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Overlap Coefficient and compare it again Jaccard. Similarity can be between neighboring vertices (default) or second hop neighbors\n", diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index d17c2b32191..aa923ba003f 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Renumber\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 047b53d62df..2a2dab0a1a7 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Renumbering Test\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Symmetrize.ipynb b/notebooks/structure/Symmetrize.ipynb index 3cb84317742..5ba692b4696 100755 --- a/notebooks/structure/Symmetrize.ipynb +++ b/notebooks/structure/Symmetrize.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Symmetrize\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _symmetrize_ function to create bi-directional edges in an undirected graph\n", "\n", diff --git a/notebooks/traversal/SSSP.ipynb b/notebooks/traversal/SSSP.ipynb index d2baeb12e74..abea30eba15 100755 --- a/notebooks/traversal/SSSP.ipynb +++ b/notebooks/traversal/SSSP.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Single Source Shortest Path (SSSP)\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute the shortest path from a starting vertex to everyother vertex in our training dataset.\n", "\n", diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 4b0f6629bc3..6e1e5ea380a 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,7 @@ from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.tests import utils # Get parameters from standard betwenness_centrality_test from cugraph.tests.test_betweenness_centrality import ( @@ -36,7 +37,7 @@ # ============================================================================= # Parameters # ============================================================================= -DATASETS = ["../datasets/karate.csv"] +DATASETS = [utils.DATASETS_UNDIRECTED[0]] MG_DEVICE_COUNT_OPTIONS = [pytest.param(1, marks=pytest.mark.preset_gpu_count), pytest.param(2, marks=pytest.mark.preset_gpu_count), pytest.param(3, marks=pytest.mark.preset_gpu_count), diff --git a/python/cugraph/tests/test_balanced_cut.py b/python/cugraph/tests/test_balanced_cut.py index f0fc7152e56..4a609e1ef13 100644 --- a/python/cugraph/tests/test_balanced_cut.py +++ b/python/cugraph/tests/test_balanced_cut.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -59,6 +60,9 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering(graph_file, partitions): @@ -81,6 +85,9 @@ def test_edge_cut_clustering(graph_file, partitions): assert cu_score < rand_score +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @@ -123,6 +130,9 @@ def test_digraph_rejected(): cugraph_call(G, 2) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index f338e5aa633..3177b78de47 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cudf @@ -55,7 +56,7 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - graph_obj_tuple, + graph_file, directed=True, k=None, normalized=False, @@ -68,49 +69,36 @@ def calc_betweenness_centrality( edgevals=False, ): """ Generate both cugraph and networkx betweenness centrality - Parameters ---------- graph_file : string Path to COO Graph representation in .csv format - directed : bool, optional, default=True - k : int or None, optional, default=None int: Number of sources to sample from None: All sources are used to compute - normalized : bool True: Normalize Betweenness Centrality scores False: Scores are left unnormalized - weight : cudf.DataFrame: Not supported as of 06/2020 - endpoints : bool True: Endpoints are included when computing scores False: Endpoints are not considered - seed : int or None, optional, default=None Seed for random sampling of the starting point - result_dtype : numpy.dtype Expected type of the result, either np.float32 or np.float64 - use_k_full : bool When True, if k is None replaces k by the number of sources of the Graph - multi_gpu_batch : bool When True, enable mg batch after constructing the graph - edgevals: bool When True, enable tests with weighted graph, should be ignored during computation. - Returns ------- - sorted_df : cudf.DataFrame Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc' and 'ref_bc' are the two betweenness centrality scores to compare. @@ -120,7 +108,8 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = graph_obj_tuple + G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed, + edgevals=edgevals) assert G is not None and Gnx is not None if multi_gpu_batch: @@ -298,67 +287,52 @@ def prepare_test(): gc.collect() -# ============================================================================= -# Pytest Fixtures -# ============================================================================= -DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] -DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] -WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] - - -small_graph_fixture_params = utils.genFixtureParamsProduct( - (DATASETS_SMALL, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - -unrenumbered_graph_fixture_params = utils.genFixtureParamsProduct( - (DATASETS_UNRENUMBERED, "grph"), - (DIRECTED, "dirctd"), - (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) - - -@pytest.fixture(scope="module", params=small_graph_fixture_params) -def get_cu_nx_graph_datasets_small(request): - return utils.build_cu_and_nx_graphs(*request.param) - - -@pytest.fixture(scope="module", params=unrenumbered_graph_fixture_params) -def get_cu_nx_graph_datasets_unrenumbered(request): - return utils.build_cu_and_nx_graphs(*request.param) - - # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, normalized=normalized, k=subset_size, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -366,8 +340,10 @@ def test_betweenness_centrality( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_k_full( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, @@ -375,12 +351,14 @@ def test_betweenness_centrality_k_full( subset_seed, result_dtype, use_k_full, + edgevals ): """Tests full betweenness centrality by using k = G.number_of_vertices() instead of k=None, checks that k scales properly""" prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, normalized=normalized, k=subset_size, weight=weight, @@ -388,6 +366,7 @@ def test_betweenness_centrality_k_full( seed=subset_seed, result_dtype=result_dtype, use_k_full=use_k_full, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") @@ -396,103 +375,134 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_fixed_sample( - get_cu_nx_graph_datasets_unrenumbered, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Test Betweenness Centrality using a subset - Only k sources are considered for an approximate Betweenness Centrality """ prepare_test() sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_unrenumbered, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [[]]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_weight_except( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Calls betwenness_centrality with weight - As of 05/28/2020, weight is not supported and should raise a NotImplementedError """ prepare_test() with pytest.raises(NotImplementedError): sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) +@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_invalid_dtype( - get_cu_nx_graph_datasets_small, + graph_file, + directed, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, + edgevals ): """Test calls edge_betwenness_centrality an invalid type""" prepare_test() with pytest.raises(TypeError): sorted_df = calc_betweenness_centrality( - get_cu_nx_graph_datasets_small, + graph_file, + directed=directed, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, + edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index 0070a34248c..00996fd3bb3 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -440,6 +441,9 @@ def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, cugraph_input_type): @@ -467,6 +471,9 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_bfs_nonnative_inputs(gpubenchmark, @@ -477,6 +484,9 @@ def test_bfs_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, cugraph_input_type): @@ -507,6 +517,9 @@ def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, ) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_connectivity.py b/python/cugraph/tests/test_connectivity.py index f957c4b417b..14572ab748d 100644 --- a/python/cugraph/tests/test_connectivity.py +++ b/python/cugraph/tests/test_connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -28,6 +28,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -290,6 +291,9 @@ def single_dataset_nxresults_strong(request): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): (graph_file, netx_labels, @@ -329,6 +333,9 @@ def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): assert nx_vertices == cg_vertices +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_weak_cc_nonnative_inputs(gpubenchmark, @@ -339,6 +346,9 @@ def test_weak_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_strong_cc(gpubenchmark, dataset_nxresults_strong, cugraph_input_type): @@ -382,6 +392,9 @@ def test_strong_cc(gpubenchmark, dataset_nxresults_strong, assert nx_vertices == cg_vertices +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_strong_cc_nonnative_inputs(gpubenchmark, @@ -392,16 +405,25 @@ def test_strong_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat_weak(single_dataset_nxresults_weak): (graph_file, _, _, _, api_type) = single_dataset_nxresults_weak assert_scipy_api_compat(graph_file, api_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat_strong(single_dataset_nxresults_strong): (graph_file, _, _, _, api_type) = single_dataset_nxresults_strong assert_scipy_api_compat(graph_file, api_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("connection_type", ["strong", "weak"]) def test_scipy_api_compat(connection_type): if connection_type == "strong": diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py index d418dd7ce2e..4d6c90364d8 100644 --- a/python/cugraph/tests/test_convert_matrix.py +++ b/python/cugraph/tests/test_convert_matrix.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -36,6 +37,9 @@ def setup_function(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_to_from_pandas(graph_file): # Read in the graph @@ -82,6 +86,9 @@ def test_to_from_pandas(graph_file): assert exp_pdf.equals(res_pdf) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_to_numpy(graph_file): # Read in the graph @@ -150,6 +157,9 @@ def test_from_to_numpy(graph_file): assert exp_pdf.equals(res_pdf) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_edgelist(graph_file): """ @@ -165,6 +175,9 @@ def test_from_edgelist(graph_file): assert G1.EdgeList == G2.EdgeList +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_adjlist(graph_file): """ diff --git a/python/cugraph/tests/test_core_number.py b/python/cugraph/tests/test_core_number.py index edbc7b0597b..c2394cdf735 100644 --- a/python/cugraph/tests/test_core_number.py +++ b/python/cugraph/tests/test_core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from cugraph.utilities import df_score_to_dictionary # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -78,6 +79,9 @@ def calc_core_number(graph_file): # https://github.com/rapidsai/cugraph/issues/1045 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number(graph_file): gc.collect() @@ -91,6 +95,9 @@ def test_core_number(graph_file): assert cg_num_dic == nx_num +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index ba705a787ee..60f97715efa 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -55,6 +55,9 @@ def golden_call(graph_file): ENSEMBLE_SIZES = [16, 32] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index 529b0b9de9c..224998df48c 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Copyright (c) 2019-2021, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cupy @@ -300,6 +301,9 @@ def prepare_test(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -332,6 +336,9 @@ def test_edge_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -373,6 +380,9 @@ def test_edge_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -409,6 +419,9 @@ def test_edge_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -447,6 +460,9 @@ def test_edge_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -482,6 +498,9 @@ def test_edge_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index b259c2567dc..fb04674a52b 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -35,6 +36,9 @@ RADIUS = [1, 2, 3] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) @@ -54,6 +58,9 @@ def test_ego_graph_nx(graph_file, seed, radius): assert nx.is_isomorphic(ego_nx, ego_cugraph) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) diff --git a/python/cugraph/tests/test_filter_unreachable.py b/python/cugraph/tests/test_filter_unreachable.py index 29b862f0285..f89dbba4e30 100644 --- a/python/cugraph/tests/test_filter_unreachable.py +++ b/python/cugraph/tests/test_filter_unreachable.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -36,6 +37,9 @@ SOURCES = [1] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_filter_unreachable(graph_file, source): diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index d8d5a504070..1a032bdaf17 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,6 +23,7 @@ from cudf.tests.utils import assert_eq import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # MG import cugraph.dask as dcg @@ -162,6 +163,9 @@ def test_version(): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_list_to_adj_list(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -182,6 +186,9 @@ def test_add_edge_list_to_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_adj_list_to_edge_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -208,6 +215,9 @@ def test_add_adj_list_to_edge_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_from_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -229,6 +239,9 @@ def test_view_edge_list_from_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -257,6 +270,9 @@ def test_delete_edge_list_delete_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -296,6 +312,9 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -334,6 +353,9 @@ def test_edges_for_Graph(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -377,6 +399,9 @@ def test_view_edge_list_for_Graph(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_consolidation(graph_file): cluster = LocalCUDACluster() @@ -411,6 +436,9 @@ def test_consolidation(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL) def test_two_hop_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -430,6 +458,9 @@ def test_two_hop_neighbors(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degree_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -468,6 +499,9 @@ def test_degree_functionality(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degrees_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -499,6 +533,9 @@ def test_degrees_functionality(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_number_of_vertices(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -517,6 +554,9 @@ def test_number_of_vertices(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_directed(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -544,6 +584,9 @@ def test_to_directed(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_undirected(graph_file): # Read data and then convert to directed by dropped some edges @@ -578,6 +621,9 @@ def test_to_undirected(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_edge(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -593,6 +639,9 @@ def test_has_edge(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_node(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -606,6 +655,9 @@ def test_has_node(graph_file): assert G.has_node(n) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_invalid_has_node(): df = cudf.DataFrame([[1, 2]], columns=["src", "dst"]) G = cugraph.Graph() @@ -615,6 +667,9 @@ def test_invalid_has_node(): assert not G.has_node(G.number_of_nodes() + 1) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_bipartite_api(graph_file): # This test only tests the functionality of adding set of nodes and @@ -648,6 +703,9 @@ def test_bipartite_api(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 6b6f54937a6..58c03bebd88 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -79,6 +80,9 @@ def networkx_call(M, max_iter, tol): TOLERANCE = [1.0e-06] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index dbce89905cd..9027fdcffd6 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,12 +34,15 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import cudf -from cudf.tests.utils import assert_eq -import cugraph import datetime as dt + import pandas as pd import pytest +import cudf +from cudf.tests.utils import assert_eq + +import cugraph +from cugraph.utilities.utils import is_device_version_less_than simple_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -56,6 +59,9 @@ })) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_complex_df(): complex_df = pd.DataFrame({ "src": [0, 1, 2, 3], @@ -101,6 +107,9 @@ def test_complex_df(): cugraph.hypergraph(complex_df) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_hyperedges(categorical_metadata): @@ -171,6 +180,9 @@ def test_hyperedges(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) @@ -179,6 +191,9 @@ def test_hyperedges_direct(): assert_eq(len(h["nodes"]), 9) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct_categories(): h = cugraph.hypergraph( @@ -195,6 +210,9 @@ def test_hyperedges_direct_categories(): assert_eq(len(h["nodes"]), 6) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyperedges_direct_manual_shaping(): h1 = cugraph.hypergraph( @@ -212,6 +230,9 @@ def test_hyperedges_direct_manual_shaping(): assert_eq(len(h2["edges"]), 12) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs(categorical_metadata): @@ -263,6 +284,9 @@ def test_drop_edge_attrs(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs_direct(categorical_metadata): @@ -304,6 +328,9 @@ def test_drop_edge_attrs_direct(categorical_metadata): assert_eq(len(h[k]), v) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -318,6 +345,9 @@ def test_skip_hyper(): assert len(hg["graph"].edges()) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_drop_na_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -332,6 +362,9 @@ def test_skip_drop_na_hyper(): assert len(hg["graph"].edges()) == 5 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -346,6 +379,9 @@ def test_skip_direct(): assert len(hg["graph"].edges()) == 3 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_drop_na_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -360,6 +396,9 @@ def test_skip_drop_na_direct(): assert len(hg["graph"].edges()) == 2 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_drop_na_hyper(): df = cudf.DataFrame.from_pandas( @@ -372,6 +411,9 @@ def test_drop_na_hyper(): assert len(hg["graph"].edges()) == 4 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_drop_na_direct(): df = cudf.DataFrame.from_pandas( @@ -384,6 +426,9 @@ def test_drop_na_direct(): assert len(hg["graph"].edges()) == 1 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_skip_na_hyperedge(): nans_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -403,6 +448,9 @@ def test_skip_na_hyperedge(): assert_eq(len(default_h_edges), len(expected_hits)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_vanilla(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -417,6 +465,9 @@ def test_hyper_to_pa_vanilla(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_mixed(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -431,6 +482,9 @@ def test_hyper_to_pa_mixed(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_na(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -448,6 +502,9 @@ def test_hyper_to_pa_na(): assert len(edges_err) == 6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_all(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"]) nodes_arr = hg["graph"].nodes().to_arrow() @@ -458,6 +515,9 @@ def test_hyper_to_pa_all(): assert len(edges_err) == 9 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_hyper_to_pa_all_direct(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"], direct=True) nodes_arr = hg["graph"].nodes().to_arrow() diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index b61101ef1d0..87e9a5a3cc7 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -91,6 +92,9 @@ def networkx_call(M): return src, dst, coeff +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard(graph_file): gc.collect() @@ -113,6 +117,9 @@ def test_jaccard(graph_file): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", [PurePath( utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] ) @@ -137,6 +144,9 @@ def test_jaccard_edgevals(graph_file): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop(graph_file): gc.collect() @@ -169,6 +179,9 @@ def test_jaccard_two_hop(graph_file): assert diff < 1.0e-6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop_edge_vals(graph_file): gc.collect() @@ -203,6 +216,9 @@ def test_jaccard_two_hop_edge_vals(graph_file): assert diff < 1.0e-6 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 5e3220dcfb1..4d3e4903d33 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -65,6 +66,9 @@ def compare_edges(cg, nxg): return True +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph(graph_file): gc.collect() @@ -74,6 +78,9 @@ def test_core_number_Graph(graph_file): assert compare_edges(cu_kcore, nx_kcore) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index a86490fb561..02b95f01a01 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -73,6 +74,9 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() @@ -86,6 +90,9 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): compare_k_truss(k_subgraph, k, nx_ground_truth) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): gc.collect() diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index a2a03c1518b..864b2974117 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -70,6 +71,9 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -82,6 +86,9 @@ def test_katz_centrality(graph_file): assert topKNX.equals(topKCU) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/tests/test_maximum_spanning_tree.py index e20e2f72267..0e55c7f15d7 100644 --- a/python/cugraph/tests/test_maximum_spanning_tree.py +++ b/python/cugraph/tests/test_maximum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm import cudf import time @@ -36,6 +37,9 @@ print("Networkx version : {} ".format(nx.__version__)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_maximum_spanning_tree_nx(graph_file): gc.collect() @@ -71,6 +75,9 @@ def test_maximum_spanning_tree_nx(graph_file): ] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_maximum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/tests/test_minimum_spanning_tree.py index 55ebdcfda08..15404bc8acf 100644 --- a/python/cugraph/tests/test_minimum_spanning_tree.py +++ b/python/cugraph/tests/test_minimum_spanning_tree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm import cudf import time @@ -36,6 +37,9 @@ print("Networkx version : {} ".format(nx.__version__)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_minimum_spanning_tree_nx(graph_file): gc.collect() @@ -71,6 +75,9 @@ def test_minimum_spanning_tree_nx(graph_file): ] +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_minimum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index 7a7d42d1592..2956d8f1913 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -52,6 +53,9 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index cb659bc7e24..62245bcf65d 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -1,6 +1,20 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import cugraph import networkx as nx from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import pytest import gc import numpy as np @@ -13,6 +27,9 @@ def setup_function(): gc.collect() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -46,6 +63,9 @@ def test_multigraph(graph_file): assert nxedges.equals(cuedges[["source", "target", "weight"]]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -79,6 +99,9 @@ def test_Graph_from_MultiGraph(graph_file): assert Gnxd.number_of_edges() == Gd.number_of_edges() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph_sssp(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py index 08a96a801e2..5799b88157e 100644 --- a/python/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/tests/test_nx_convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -48,6 +49,9 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert cu_df.to_dict() == nx_df.to_dict() +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs @@ -77,7 +81,9 @@ def test_networkx_compatibility(graph_file): _compare_graphs(nxG, cuG) -# Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert(graph_file): gc.collect() @@ -91,6 +97,9 @@ def test_nx_convert(graph_file): _compare_graphs(nxG, cuG, has_wt=False) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert_multicol(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index 53d279478f7..96e510c0294 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import scipy import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(cu_M, pairs, edgevals=False): @@ -82,6 +83,9 @@ def cpu_call(M, first, second): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap(graph_file): gc.collect() @@ -116,6 +120,9 @@ def test_overlap(graph_file): # Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap_edge_vals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 3ce8dd4ffe9..48ab1b39caa 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -144,6 +145,9 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # https://github.com/rapidsai/cugraph/issues/533 # +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -194,6 +198,9 @@ def test_pagerank( assert err < (0.01 * len(cugraph_pr)) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_paths.py b/python/cugraph/tests/test_paths.py index 7467d024051..f58195570b8 100644 --- a/python/cugraph/tests/test_paths.py +++ b/python/cugraph/tests/test_paths.py @@ -1,11 +1,28 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from tempfile import NamedTemporaryFile + import cudf -import cugraph from cupy.sparse import coo_matrix as cupy_coo_matrix import cupy import networkx as nx import pytest -import sys -from tempfile import NamedTemporaryFile + +import cugraph +from cugraph.utilities.utils import is_device_version_less_than + CONNECTED_GRAPH = """1,5,3 1,4,1 @@ -58,6 +75,9 @@ def graphs(request): yield cugraph_G, nx_G, cupy_df +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_connected_graph_shortest_path_length(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -91,6 +111,9 @@ def test_connected_graph_shortest_path_length(graphs): assert path_1_to_6_length == cugraph.shortest_path_length(cupy_df, 1, 6) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_source(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -105,6 +128,9 @@ def test_shortest_path_length_invalid_source(graphs): cugraph.shortest_path_length(cupy_df, -1, 1) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_target(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -119,6 +145,9 @@ def test_shortest_path_length_invalid_target(graphs): cugraph.shortest_path_length(cupy_df, 1, 10) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_vertexes(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -133,6 +162,9 @@ def test_shortest_path_length_invalid_vertexes(graphs): cugraph.shortest_path_length(cupy_df, 0, 42) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_path(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -143,6 +175,9 @@ def test_shortest_path_length_no_path(graphs): assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_target(graphs): cugraph_G, nx_G, cupy_df = graphs diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 5362d3f5804..57912150b12 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -17,12 +17,16 @@ import pandas as pd import pytest - import cudf + from cugraph.structure.number_map import NumberMap from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_ips(): source_list = [ "192.168.1.1", @@ -57,6 +61,9 @@ def test_renumber_ips(): assert check_dst.equals(gdf["dest_as_int"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_ips_cols(): source_list = [ @@ -125,6 +132,9 @@ def test_renumber_ips_str_cols(): assert check_dst.equals(gdf["dest_list"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_negative(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -146,6 +156,9 @@ def test_renumber_negative(): assert check_dst.equals(gdf["dest_list"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_renumber_negative_col(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -167,7 +180,9 @@ def test_renumber_negative_col(): assert check_dst.equals(gdf["dest_list"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): @@ -205,6 +220,9 @@ def test_renumber_series(graph_file): assert check_dst["0_y"].equals(check_dst["0_x"]) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): gc.collect() @@ -238,7 +256,9 @@ def test_renumber_files(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_col(graph_file): gc.collect() @@ -271,7 +291,9 @@ def test_renumber_files_col(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_multi_col(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_sssp.py b/python/cugraph/tests/test_sssp.py index 0a5347a6290..9e866c84f07 100644 --- a/python/cugraph/tests/test_sssp.py +++ b/python/cugraph/tests/test_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -28,6 +28,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -203,6 +204,9 @@ def single_dataset_source_nxresults_weighted(request): # ============================================================================= # Tests # ============================================================================= +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): # Extract the params generated from the fixture @@ -232,6 +236,9 @@ def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_nonnative_inputs(gpubenchmark, @@ -242,6 +249,9 @@ def test_sssp_nonnative_inputs(gpubenchmark, cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, cugraph_input_type): @@ -276,6 +286,9 @@ def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_edgevals_nonnative_inputs( @@ -287,6 +300,9 @@ def test_sssp_edgevals_nonnative_inputs( cugraph_input_type) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_sssp_data_type_conversion(graph_file, source): @@ -341,6 +357,9 @@ def test_sssp_data_type_conversion(graph_file, source): assert err == 0 +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 9e9eccc4347..5be80f341b5 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -70,7 +71,9 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -85,9 +88,9 @@ def test_subgraph_extraction_DiGraph(graph_file): assert compare_edges(cu_sg, nx_sg) -# Test all combinations of default/managed and pooled/non-pooled allocation - - +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph(graph_file): gc.collect() @@ -102,6 +105,9 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_triangle_count.py b/python/cugraph/tests/test_triangle_count.py index ff28f55838d..d768183e4ad 100644 --- a/python/cugraph/tests/test_triangle_count.py +++ b/python/cugraph/tests/test_triangle_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -66,6 +67,9 @@ def networkx_call(M): # https://github.com/rapidsai/cugraph/issues/1043 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles(graph_file): gc.collect() @@ -76,6 +80,9 @@ def test_triangles(graph_file): assert cu_count == nx_count +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_edge_vals(graph_file): gc.collect() @@ -86,6 +93,9 @@ def test_triangles_edge_vals(graph_file): assert cu_count == nx_count +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 55410817f90..b350ef27efd 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -15,9 +15,13 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() @@ -43,6 +47,9 @@ def test_bfs_paths(): assert "not in the result set" in str(ErrorMsg) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index c5cab18484c..f31d65de652 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -86,6 +87,9 @@ def networkx_call(M): return coeff +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_wjaccard(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index e7da21014ba..50d7b0ecf84 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import cudf import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -83,7 +84,9 @@ def cpu_call(M, first, second): return result -# Test +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_woverlap(graph_file): gc.collect() From d1ec7ac873da74f34fae7a82513d107077cc945c Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 2 Apr 2021 22:41:35 +0200 Subject: [PATCH 40/51] Clean up nullptr cuda_stream_view arguments (#1504) Cugraph fix for: https://github.com/rapidsai/cugraph/issues/1499 Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Brad Rees (https://github.com/BradReesWork) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1504 --- cpp/CMakeLists.txt | 2 +- cpp/tests/traversal/tsp_test.cu | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 57f324a60a9..3b1e93d0781 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -275,7 +275,7 @@ message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") FetchContent_Declare( cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad + GIT_TAG e58d0ecdbc270fc28867d66c965787a62a7a882c GIT_SHALLOW true SOURCE_SUBDIR hornet ) diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 9ebf464ae3e..d4e9ff90f35 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -133,10 +133,11 @@ class Tests_Tsp : public ::testing::TestWithParam { // Device alloc raft::handle_t const handle; - rmm::device_uvector vertices(static_cast(nodes), nullptr); - rmm::device_uvector route(static_cast(nodes), nullptr); - rmm::device_uvector x_pos(static_cast(nodes), nullptr); - rmm::device_uvector y_pos(static_cast(nodes), nullptr); + auto stream = handle.get_stream(); + rmm::device_uvector vertices(static_cast(nodes), stream); + rmm::device_uvector route(static_cast(nodes), stream); + rmm::device_uvector x_pos(static_cast(nodes), stream); + rmm::device_uvector y_pos(static_cast(nodes), stream); int* vtx_ptr = vertices.data(); int* d_route = route.data(); From 8222f315dfdc25d89bc958235dc60d56f291751d Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Fri, 2 Apr 2021 18:38:55 -0500 Subject: [PATCH 41/51] pascal renumbering fix (#1505) Authors: - https://github.com/Iroy30 - Rick Ratzel (https://github.com/rlratzel) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1505 --- benchmarks/bench_algos.py | 32 ---------- notebooks/centrality/Betweenness.ipynb | 1 - notebooks/centrality/Katz.ipynb | 1 - notebooks/community/Spectral-Clustering.ipynb | 1 - notebooks/community/Triangle-Counting.ipynb | 1 - .../components/ConnectedComponents.ipynb | 1 - notebooks/cores/core-number.ipynb | 1 - notebooks/cores/kcore.ipynb | 1 - notebooks/cores/ktruss.ipynb | 1 - notebooks/link_analysis/HITS.ipynb | 1 - .../link_prediction/Jaccard-Similarity.ipynb | 1 - .../link_prediction/Overlap-Similarity.ipynb | 1 - notebooks/structure/Renumber-2.ipynb | 1 - notebooks/structure/Renumber.ipynb | 1 - notebooks/structure/Symmetrize.ipynb | 1 - notebooks/traversal/SSSP.ipynb | 1 - python/cugraph/structure/number_map.py | 10 ++++ python/cugraph/tests/test_balanced_cut.py | 10 ---- .../tests/test_betweenness_centrality.py | 19 ------ python/cugraph/tests/test_bfs.py | 13 ----- python/cugraph/tests/test_connectivity.py | 22 ------- python/cugraph/tests/test_convert_matrix.py | 13 ----- python/cugraph/tests/test_core_number.py | 7 --- python/cugraph/tests/test_ecg.py | 3 - .../tests/test_edge_betweenness_centrality.py | 19 ------ python/cugraph/tests/test_egonet.py | 7 --- .../cugraph/tests/test_filter_unreachable.py | 4 -- python/cugraph/tests/test_graph.py | 58 ------------------- python/cugraph/tests/test_hits.py | 4 -- python/cugraph/tests/test_hypergraph.py | 58 ------------------- python/cugraph/tests/test_jaccard.py | 16 ----- python/cugraph/tests/test_k_core.py | 7 --- python/cugraph/tests/test_k_truss_subgraph.py | 7 --- python/cugraph/tests/test_katz_centrality.py | 7 --- .../tests/test_maximum_spanning_tree.py | 16 ++--- .../tests/test_minimum_spanning_tree.py | 16 ++--- python/cugraph/tests/test_modularity.py | 4 -- python/cugraph/tests/test_multigraph.py | 20 ++----- python/cugraph/tests/test_nx_convert.py | 13 +---- python/cugraph/tests/test_overlap.py | 8 +-- python/cugraph/tests/test_pagerank.py | 8 +-- python/cugraph/tests/test_paths.py | 19 ------ python/cugraph/tests/test_renumber.py | 25 -------- python/cugraph/tests/test_sssp.py | 20 +------ .../cugraph/tests/test_subgraph_extraction.py | 11 +--- python/cugraph/tests/test_triangle_count.py | 11 +--- python/cugraph/tests/test_utils.py | 11 +--- python/cugraph/tests/test_wjaccard.py | 5 +- python/cugraph/tests/test_woverlap.py | 7 +-- 49 files changed, 40 insertions(+), 485 deletions(-) diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index 14c15ebc08c..f9f8bf9cf53 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -162,8 +162,6 @@ def anyGraphWithTransposedAdjListComputed(request): ############################################################################### # Benchmarks @pytest.mark.ETL -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_graph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -181,8 +179,6 @@ def bench_create_graph(gpubenchmark, edgelistCreated): warmup_iterations=10, max_time=0.005 ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_create_digraph(gpubenchmark, edgelistCreated): gpubenchmark(cugraph.from_cudf_edgelist, edgelistCreated, @@ -192,39 +188,27 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): @pytest.mark.ETL -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_renumber(gpubenchmark, edgelistCreated): gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): gpubenchmark(cugraph.pagerank, anyGraphWithTransposedAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.force_atlas2, anyGraphWithAdjListComputed, max_iter=50) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) @@ -235,28 +219,20 @@ def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_weakly_connected_components(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.weakly_connected_components, anyGraphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.overlap, anyGraphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_triangles(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.triangles, graphWithAdjListComputed) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_spectralBalancedCutClustering(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.spectralBalancedCutClustering, @@ -271,27 +247,19 @@ def bench_spectralModularityMaximizationClustering( anyGraphWithAdjListComputed, 2) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degree(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degree) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_graph_degrees(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(anyGraphWithAdjListComputed.degrees) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.betweenness_centrality, anyGraphWithAdjListComputed, k=10, seed=123) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_edge_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): gpubenchmark(cugraph.edge_betweenness_centrality, diff --git a/notebooks/centrality/Betweenness.ipynb b/notebooks/centrality/Betweenness.ipynb index d748defe74c..e4e33ef91e5 100644 --- a/notebooks/centrality/Betweenness.ipynb +++ b/notebooks/centrality/Betweenness.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Betweenness Centrality\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Betweenness centrality for both vertices and edges in our test datase using cuGraph and NetworkX. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/centrality/Katz.ipynb b/notebooks/centrality/Katz.ipynb index cdf8828b80a..2330fc08de8 100755 --- a/notebooks/centrality/Katz.ipynb +++ b/notebooks/centrality/Katz.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Katz Centrality\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will compute the Katz centrality of each vertex in our test datase using both cuGraph and NetworkX. Additionally, NetworkX also contains a Numpy implementation that will used. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/community/Spectral-Clustering.ipynb b/notebooks/community/Spectral-Clustering.ipynb index a3aa538b062..fcefae5eb60 100755 --- a/notebooks/community/Spectral-Clustering.ipynb +++ b/notebooks/community/Spectral-Clustering.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Spectral Clustering \n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using Spectral Clustering with both the (A) Balance Cut metric, and (B) the Modularity Maximization metric\n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 796f6e59fe6..19d3f838fc6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Triangle Counting\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will count the numner of trianges in our test dataset. The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", "\n", diff --git a/notebooks/components/ConnectedComponents.ipynb b/notebooks/components/ConnectedComponents.ipynb index e9669d75b38..a9c82e6669f 100755 --- a/notebooks/components/ConnectedComponents.ipynb +++ b/notebooks/components/ConnectedComponents.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Connected Components\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute weakly and strongly connected components of a graph and display some useful information about the resulting components.\n", "\n", diff --git a/notebooks/cores/core-number.ipynb b/notebooks/cores/core-number.ipynb index 127898fb094..6190f653020 100755 --- a/notebooks/cores/core-number.ipynb +++ b/notebooks/cores/core-number.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Core Number\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to compute the core number of every vertex in our test graph \n", diff --git a/notebooks/cores/kcore.ipynb b/notebooks/cores/kcore.ipynb index 250a1ea2aa5..342f4ecd5f7 100755 --- a/notebooks/cores/kcore.ipynb +++ b/notebooks/cores/kcore.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# K-Cores\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Cores clusters in a test graph \n", diff --git a/notebooks/cores/ktruss.ipynb b/notebooks/cores/ktruss.ipynb index 2fe93247d67..e6470110666 100644 --- a/notebooks/cores/ktruss.ipynb +++ b/notebooks/cores/ktruss.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# K-Truss\n", - "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the K-Truss clusters in a test graph \n", diff --git a/notebooks/link_analysis/HITS.ipynb b/notebooks/link_analysis/HITS.ipynb index 891133a277c..01fd22929d5 100755 --- a/notebooks/link_analysis/HITS.ipynb +++ b/notebooks/link_analysis/HITS.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# HITS\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use both NetworkX and cuGraph to compute HITS. \n", "The NetworkX and cuGraph processes will be interleaved so that each step can be compared.\n", diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 9a53e559323..21835da1cce 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Jaccard Similarity\n", - "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Jaccard vertex similarity metrics available in cuGraph. cuGraph supports:\n", diff --git a/notebooks/link_prediction/Overlap-Similarity.ipynb b/notebooks/link_prediction/Overlap-Similarity.ipynb index ec02a8ebbea..b8733ce4d80 100755 --- a/notebooks/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/link_prediction/Overlap-Similarity.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Overlap Similarity\n", - "# Does not run on Pascal\n", "----\n", "\n", "In this notebook we will explore the Overlap Coefficient and compare it again Jaccard. Similarity can be between neighboring vertices (default) or second hop neighbors\n", diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index aa923ba003f..d17c2b32191 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Renumber\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 2a2dab0a1a7..047b53d62df 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Renumbering Test\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _renumber_ function to compute new vertex IDs.\n", "\n", diff --git a/notebooks/structure/Symmetrize.ipynb b/notebooks/structure/Symmetrize.ipynb index 5ba692b4696..3cb84317742 100755 --- a/notebooks/structure/Symmetrize.ipynb +++ b/notebooks/structure/Symmetrize.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Symmetrize\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use the _symmetrize_ function to create bi-directional edges in an undirected graph\n", "\n", diff --git a/notebooks/traversal/SSSP.ipynb b/notebooks/traversal/SSSP.ipynb index abea30eba15..d2baeb12e74 100755 --- a/notebooks/traversal/SSSP.ipynb +++ b/notebooks/traversal/SSSP.ipynb @@ -5,7 +5,6 @@ "metadata": {}, "source": [ "# Single Source Shortest Path (SSSP)\n", - "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to compute the shortest path from a starting vertex to everyother vertex in our training dataset.\n", "\n", diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 5f801eb0d90..e45a50d6dbe 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -16,6 +16,7 @@ from dask.distributed import wait, default_client from cugraph.dask.common.input_utils import get_distributed_data from cugraph.structure import renumber_wrapper as c_renumber +from cugraph.utilities.utils import is_device_version_less_than import cugraph.comms.comms as Comms import dask_cudf import numpy as np @@ -476,6 +477,8 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, elif not (df[src_col_names].dtype == np.int32 or df[src_col_names].dtype == np.int64): renumber_type = 'legacy' + elif is_device_version_less_than((7, 0)): + renumber_type = 'legacy' else: renumber_type = 'experimental' df = df.rename(columns={src_col_names: "src", @@ -562,6 +565,12 @@ def get_renumbered_df(data): return renumbered_df, renumber_map else: + if is_device_version_less_than((7, 0)): + renumbered_df = df + renumber_map.implementation.df = indirection_map + renumber_map.implementation.numbered = True + return renumbered_df, renumber_map + renumbering_map, renumbered_df = c_renumber.renumber( df, num_edges, @@ -578,6 +587,7 @@ def get_renumbered_df(data): else: renumber_map.implementation.df = renumbering_map.rename( columns={'original_ids': '0', 'new_ids': 'id'}, copy=False) + renumber_map.implementation.numbered = True return renumbered_df, renumber_map diff --git a/python/cugraph/tests/test_balanced_cut.py b/python/cugraph/tests/test_balanced_cut.py index 4a609e1ef13..2492017511a 100644 --- a/python/cugraph/tests/test_balanced_cut.py +++ b/python/cugraph/tests/test_balanced_cut.py @@ -20,7 +20,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -60,9 +59,6 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering(graph_file, partitions): @@ -85,9 +81,6 @@ def test_edge_cut_clustering(graph_file, partitions): assert cu_score < rand_score -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @@ -130,9 +123,6 @@ def test_digraph_rejected(): cugraph_call(G, 2) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 3177b78de47..29c012e95a2 100755 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cudf @@ -290,9 +289,6 @@ def prepare_test(): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -328,9 +324,6 @@ def test_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -375,9 +368,6 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -416,9 +406,6 @@ def test_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -459,9 +446,6 @@ def test_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -500,9 +484,6 @@ def test_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index 00996fd3bb3..d04ef957104 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -19,7 +19,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -441,9 +440,6 @@ def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, cugraph_input_type): @@ -471,9 +467,6 @@ def test_bfs(gpubenchmark, dataset_nxresults_startvertex_spc, ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_bfs_nonnative_inputs(gpubenchmark, @@ -484,9 +477,6 @@ def test_bfs_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_INPUT_TYPES) def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, cugraph_input_type): @@ -517,9 +507,6 @@ def test_bfs_spc_full(gpubenchmark, dataset_nxresults_allstartvertices_spc, ) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_connectivity.py b/python/cugraph/tests/test_connectivity.py index 14572ab748d..194147ab620 100644 --- a/python/cugraph/tests/test_connectivity.py +++ b/python/cugraph/tests/test_connectivity.py @@ -28,7 +28,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -291,9 +290,6 @@ def single_dataset_nxresults_strong(request): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): (graph_file, netx_labels, @@ -333,9 +329,6 @@ def test_weak_cc(gpubenchmark, dataset_nxresults_weak, cugraph_input_type): assert nx_vertices == cg_vertices -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_weak_cc_nonnative_inputs(gpubenchmark, @@ -346,9 +339,6 @@ def test_weak_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_strong_cc(gpubenchmark, dataset_nxresults_strong, cugraph_input_type): @@ -392,9 +382,6 @@ def test_strong_cc(gpubenchmark, dataset_nxresults_strong, assert nx_vertices == cg_vertices -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_strong_cc_nonnative_inputs(gpubenchmark, @@ -405,25 +392,16 @@ def test_strong_cc_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat_weak(single_dataset_nxresults_weak): (graph_file, _, _, _, api_type) = single_dataset_nxresults_weak assert_scipy_api_compat(graph_file, api_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat_strong(single_dataset_nxresults_strong): (graph_file, _, _, _, api_type) = single_dataset_nxresults_strong assert_scipy_api_compat(graph_file, api_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("connection_type", ["strong", "weak"]) def test_scipy_api_compat(connection_type): if connection_type == "strong": diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py index 4d6c90364d8..1dbf51910ea 100644 --- a/python/cugraph/tests/test_convert_matrix.py +++ b/python/cugraph/tests/test_convert_matrix.py @@ -15,7 +15,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -37,9 +36,6 @@ def setup_function(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_to_from_pandas(graph_file): # Read in the graph @@ -86,9 +82,6 @@ def test_to_from_pandas(graph_file): assert exp_pdf.equals(res_pdf) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_to_numpy(graph_file): # Read in the graph @@ -157,9 +150,6 @@ def test_from_to_numpy(graph_file): assert exp_pdf.equals(res_pdf) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_edgelist(graph_file): """ @@ -175,9 +165,6 @@ def test_from_edgelist(graph_file): assert G1.EdgeList == G2.EdgeList -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_from_adjlist(graph_file): """ diff --git a/python/cugraph/tests/test_core_number.py b/python/cugraph/tests/test_core_number.py index c2394cdf735..9cfc37ba1c5 100644 --- a/python/cugraph/tests/test_core_number.py +++ b/python/cugraph/tests/test_core_number.py @@ -15,7 +15,6 @@ import pytest import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from cugraph.utilities import df_score_to_dictionary # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -79,9 +78,6 @@ def calc_core_number(graph_file): # https://github.com/rapidsai/cugraph/issues/1045 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number(graph_file): gc.collect() @@ -95,9 +91,6 @@ def test_core_number(graph_file): assert cg_num_dic == nx_num -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 60f97715efa..ba705a787ee 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -55,9 +55,6 @@ def golden_call(graph_file): ENSEMBLE_SIZES = [16, 32] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index 224998df48c..8c5aad7dc61 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import random import numpy as np import cupy @@ -301,9 +300,6 @@ def prepare_test(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -336,9 +332,6 @@ def test_edge_betweenness_centrality( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @@ -380,9 +373,6 @@ def test_edge_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -419,9 +409,6 @@ def test_edge_betweenness_centrality_fixed_sample( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @@ -460,9 +447,6 @@ def test_edge_betweenness_centrality_weight_except( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @@ -498,9 +482,6 @@ def test_edge_betweenness_invalid_dtype( compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index fb04674a52b..b259c2567dc 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -36,9 +35,6 @@ RADIUS = [1, 2, 3] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) @@ -58,9 +54,6 @@ def test_ego_graph_nx(graph_file, seed, radius): assert nx.is_isomorphic(ego_nx, ego_cugraph) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seeds", [[0, 5, 13]]) @pytest.mark.parametrize("radius", [1, 2, 3]) diff --git a/python/cugraph/tests/test_filter_unreachable.py b/python/cugraph/tests/test_filter_unreachable.py index f89dbba4e30..6c00461d234 100644 --- a/python/cugraph/tests/test_filter_unreachable.py +++ b/python/cugraph/tests/test_filter_unreachable.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +36,6 @@ SOURCES = [1] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_filter_unreachable(graph_file, source): diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 1a032bdaf17..348f7e2e130 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -23,7 +23,6 @@ from cudf.tests.utils import assert_eq import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # MG import cugraph.dask as dcg @@ -163,9 +162,6 @@ def test_version(): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_list_to_adj_list(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -186,9 +182,6 @@ def test_add_edge_list_to_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_adj_list_to_edge_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -215,9 +208,6 @@ def test_add_adj_list_to_edge_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_from_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -239,9 +229,6 @@ def test_view_edge_list_from_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -270,9 +257,6 @@ def test_delete_edge_list_delete_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): Mnx = utils.read_csv_for_nx(graph_file) @@ -312,9 +296,6 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -353,9 +334,6 @@ def test_edges_for_Graph(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_view_edge_list_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -399,9 +377,6 @@ def test_view_edge_list_for_Graph(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_consolidation(graph_file): cluster = LocalCUDACluster() @@ -436,9 +411,6 @@ def test_consolidation(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL) def test_two_hop_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -458,9 +430,6 @@ def test_two_hop_neighbors(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degree_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -499,9 +468,6 @@ def test_degree_functionality(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_degrees_functionality(graph_file): M = utils.read_csv_for_nx(graph_file) @@ -533,9 +499,6 @@ def test_degrees_functionality(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_number_of_vertices(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -554,9 +517,6 @@ def test_number_of_vertices(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_directed(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -584,9 +544,6 @@ def test_to_directed(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) def test_to_undirected(graph_file): # Read data and then convert to directed by dropped some edges @@ -621,9 +578,6 @@ def test_to_undirected(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_edge(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -639,9 +593,6 @@ def test_has_edge(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_has_node(graph_file): cu_M = utils.read_csv_file(graph_file) @@ -655,9 +606,6 @@ def test_has_node(graph_file): assert G.has_node(n) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_invalid_has_node(): df = cudf.DataFrame([[1, 2]], columns=["src", "dst"]) G = cugraph.Graph() @@ -667,9 +615,6 @@ def test_invalid_has_node(): assert not G.has_node(G.number_of_nodes() + 1) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_bipartite_api(graph_file): # This test only tests the functionality of adding set of nodes and @@ -703,9 +648,6 @@ def test_bipartite_api(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_neighbors(graph_file): cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 58c03bebd88..9229f3734f8 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -20,7 +20,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -80,9 +79,6 @@ def networkx_call(M, max_iter, tol): TOLERANCE = [1.0e-06] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_hypergraph.py b/python/cugraph/tests/test_hypergraph.py index 9027fdcffd6..43801be9fdc 100644 --- a/python/cugraph/tests/test_hypergraph.py +++ b/python/cugraph/tests/test_hypergraph.py @@ -42,7 +42,6 @@ from cudf.tests.utils import assert_eq import cugraph -from cugraph.utilities.utils import is_device_version_less_than simple_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -59,9 +58,6 @@ })) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_complex_df(): complex_df = pd.DataFrame({ "src": [0, 1, 2, 3], @@ -107,9 +103,6 @@ def test_complex_df(): cugraph.hypergraph(complex_df) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_hyperedges(categorical_metadata): @@ -180,9 +173,6 @@ def test_hyperedges(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct(): h = cugraph.hypergraph(hyper_df, direct=True) @@ -191,9 +181,6 @@ def test_hyperedges_direct(): assert_eq(len(h["nodes"]), 9) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct_categories(): h = cugraph.hypergraph( @@ -210,9 +197,6 @@ def test_hyperedges_direct_categories(): assert_eq(len(h["nodes"]), 6) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyperedges_direct_manual_shaping(): h1 = cugraph.hypergraph( @@ -230,9 +214,6 @@ def test_hyperedges_direct_manual_shaping(): assert_eq(len(h2["edges"]), 12) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs(categorical_metadata): @@ -284,9 +265,6 @@ def test_drop_edge_attrs(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("categorical_metadata", [False, True]) def test_drop_edge_attrs_direct(categorical_metadata): @@ -328,9 +306,6 @@ def test_drop_edge_attrs_direct(categorical_metadata): assert_eq(len(h[k]), v) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -345,9 +320,6 @@ def test_skip_hyper(): assert len(hg["graph"].edges()) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_drop_na_hyper(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -362,9 +334,6 @@ def test_skip_drop_na_hyper(): assert len(hg["graph"].edges()) == 5 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -379,9 +348,6 @@ def test_skip_direct(): assert len(hg["graph"].edges()) == 3 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_drop_na_direct(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -396,9 +362,6 @@ def test_skip_drop_na_direct(): assert len(hg["graph"].edges()) == 2 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_drop_na_hyper(): df = cudf.DataFrame.from_pandas( @@ -411,9 +374,6 @@ def test_drop_na_hyper(): assert len(hg["graph"].edges()) == 4 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_drop_na_direct(): df = cudf.DataFrame.from_pandas( @@ -426,9 +386,6 @@ def test_drop_na_direct(): assert len(hg["graph"].edges()) == 1 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_skip_na_hyperedge(): nans_df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -448,9 +405,6 @@ def test_skip_na_hyperedge(): assert_eq(len(default_h_edges), len(expected_hits)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_vanilla(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -465,9 +419,6 @@ def test_hyper_to_pa_vanilla(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_mixed(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -482,9 +433,6 @@ def test_hyper_to_pa_mixed(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_na(): df = cudf.DataFrame.from_pandas(pd.DataFrame({ @@ -502,9 +450,6 @@ def test_hyper_to_pa_na(): assert len(edges_err) == 6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_all(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"]) nodes_arr = hg["graph"].nodes().to_arrow() @@ -515,9 +460,6 @@ def test_hyper_to_pa_all(): assert len(edges_err) == 9 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_hyper_to_pa_all_direct(): hg = cugraph.hypergraph(simple_df, ["id", "a1", "🙈"], direct=True) nodes_arr = hg["graph"].nodes().to_arrow() diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 87e9a5a3cc7..b61101ef1d0 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings @@ -92,9 +91,6 @@ def networkx_call(M): return src, dst, coeff -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard(graph_file): gc.collect() @@ -117,9 +113,6 @@ def test_jaccard(graph_file): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", [PurePath( utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] ) @@ -144,9 +137,6 @@ def test_jaccard_edgevals(graph_file): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop(graph_file): gc.collect() @@ -179,9 +169,6 @@ def test_jaccard_two_hop(graph_file): assert diff < 1.0e-6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_two_hop_edge_vals(graph_file): gc.collect() @@ -216,9 +203,6 @@ def test_jaccard_two_hop_edge_vals(graph_file): assert diff < 1.0e-6 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_jaccard_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 4d3e4903d33..33d403ee27b 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -66,9 +65,6 @@ def compare_edges(cg, nxg): return True -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph(graph_file): gc.collect() @@ -78,9 +74,6 @@ def test_core_number_Graph(graph_file): assert compare_edges(cu_kcore, nx_kcore) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_core_number_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 02b95f01a01..a86490fb561 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than import numpy as np @@ -74,9 +73,6 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() @@ -90,9 +86,6 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): compare_k_truss(k_subgraph, k, nx_ground_truth) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): gc.collect() diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index 864b2974117..1fef6b05d59 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -17,7 +17,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,9 +70,6 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -86,9 +82,6 @@ def test_katz_centrality(graph_file): assert topKNX.equals(topKCU) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_maximum_spanning_tree.py b/python/cugraph/tests/test_maximum_spanning_tree.py index 0e55c7f15d7..311f28bd6f8 100644 --- a/python/cugraph/tests/test_maximum_spanning_tree.py +++ b/python/cugraph/tests/test_maximum_spanning_tree.py @@ -11,17 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time import gc import pytest +import numpy as np +import rmm +import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import rmm -import cudf -import time -import numpy as np + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +37,6 @@ print("Networkx version : {} ".format(nx.__version__)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_maximum_spanning_tree_nx(graph_file): gc.collect() @@ -75,9 +72,6 @@ def test_maximum_spanning_tree_nx(graph_file): ] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_maximum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_minimum_spanning_tree.py b/python/cugraph/tests/test_minimum_spanning_tree.py index 15404bc8acf..d1588507bce 100644 --- a/python/cugraph/tests/test_minimum_spanning_tree.py +++ b/python/cugraph/tests/test_minimum_spanning_tree.py @@ -11,17 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time import gc import pytest +import numpy as np +import rmm +import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import rmm -import cudf -import time -import numpy as np + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -37,9 +37,6 @@ print("Networkx version : {} ".format(nx.__version__)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED_WEIGHTS) def test_minimum_spanning_tree_nx(graph_file): gc.collect() @@ -75,9 +72,6 @@ def test_minimum_spanning_tree_nx(graph_file): ] -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="Skipping large tests") @pytest.mark.parametrize("graph_size", DATASETS_SIZES) def test_random_minimum_spanning_tree_nx(graph_size): diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index 2956d8f1913..c1ff95042ed 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -19,7 +19,6 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(G, partitions): @@ -53,9 +52,6 @@ def random_call(G, partitions): # Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_modularity_clustering(graph_file, partitions): diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py index 62245bcf65d..57be3eb34e8 100644 --- a/python/cugraph/tests/test_multigraph.py +++ b/python/cugraph/tests/test_multigraph.py @@ -11,14 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import networkx as nx -from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import pytest import gc + +import pytest +import networkx as nx import numpy as np +import cugraph +from cugraph.tests import utils + # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -27,9 +28,6 @@ def setup_function(): gc.collect() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -63,9 +61,6 @@ def test_multigraph(graph_file): assert nxedges.equals(cuedges[["source", "target", "weight"]]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_Graph_from_MultiGraph(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available @@ -99,9 +94,6 @@ def test_Graph_from_MultiGraph(graph_file): assert Gnxd.number_of_edges() == Gd.number_of_edges() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_multigraph_sssp(graph_file): # FIXME: Migrate to new test fixtures for Graph setup once available diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py index 5799b88157e..98cc8a11dc7 100644 --- a/python/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/tests/test_nx_convert.py @@ -12,11 +12,13 @@ # limitations under the License. import gc + import pytest import cudf + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -49,9 +51,6 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert cu_df.to_dict() == nx_df.to_dict() -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs @@ -81,9 +80,6 @@ def test_networkx_compatibility(graph_file): _compare_graphs(nxG, cuG) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert(graph_file): gc.collect() @@ -97,9 +93,6 @@ def test_nx_convert(graph_file): _compare_graphs(nxG, cuG, has_wt=False) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert_multicol(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index 96e510c0294..a0c336c3f16 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -17,9 +17,9 @@ import pytest import numpy as np import scipy + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than def cugraph_call(cu_M, pairs, edgevals=False): @@ -83,9 +83,6 @@ def cpu_call(M, first, second): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap(graph_file): gc.collect() @@ -120,9 +117,6 @@ def test_overlap(graph_file): # Test -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_overlap_edge_vals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 48ab1b39caa..163b2adb967 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -20,7 +20,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -145,9 +145,6 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # https://github.com/rapidsai/cugraph/issues/533 # -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -198,9 +195,6 @@ def test_pagerank( assert err < (0.01 * len(cugraph_pr)) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) diff --git a/python/cugraph/tests/test_paths.py b/python/cugraph/tests/test_paths.py index f58195570b8..56cc9b3cd50 100644 --- a/python/cugraph/tests/test_paths.py +++ b/python/cugraph/tests/test_paths.py @@ -21,7 +21,6 @@ import pytest import cugraph -from cugraph.utilities.utils import is_device_version_less_than CONNECTED_GRAPH = """1,5,3 @@ -75,9 +74,6 @@ def graphs(request): yield cugraph_G, nx_G, cupy_df -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_connected_graph_shortest_path_length(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -111,9 +107,6 @@ def test_connected_graph_shortest_path_length(graphs): assert path_1_to_6_length == cugraph.shortest_path_length(cupy_df, 1, 6) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_source(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -128,9 +121,6 @@ def test_shortest_path_length_invalid_source(graphs): cugraph.shortest_path_length(cupy_df, -1, 1) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_target(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -145,9 +135,6 @@ def test_shortest_path_length_invalid_target(graphs): cugraph.shortest_path_length(cupy_df, 1, 10) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [CONNECTED_GRAPH], indirect=True) def test_shortest_path_length_invalid_vertexes(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -162,9 +149,6 @@ def test_shortest_path_length_invalid_vertexes(graphs): cugraph.shortest_path_length(cupy_df, 0, 42) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_path(graphs): cugraph_G, nx_G, cupy_df = graphs @@ -175,9 +159,6 @@ def test_shortest_path_length_no_path(graphs): assert path_1_to_8 == cugraph.shortest_path_length(cupy_df, 1, 8) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graphs", [DISCONNECTED_GRAPH], indirect=True) def test_shortest_path_length_no_target(graphs): cugraph_G, nx_G, cupy_df = graphs diff --git a/python/cugraph/tests/test_renumber.py b/python/cugraph/tests/test_renumber.py index 57912150b12..129bd667621 100644 --- a/python/cugraph/tests/test_renumber.py +++ b/python/cugraph/tests/test_renumber.py @@ -21,12 +21,8 @@ from cugraph.structure.number_map import NumberMap from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_ips(): source_list = [ "192.168.1.1", @@ -61,9 +57,6 @@ def test_renumber_ips(): assert check_dst.equals(gdf["dest_as_int"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_ips_cols(): source_list = [ @@ -132,9 +125,6 @@ def test_renumber_ips_str_cols(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_negative(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -156,9 +146,6 @@ def test_renumber_negative(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_renumber_negative_col(): source_list = [4, 6, 8, -20, 1] dest_list = [1, 29, 35, 0, 77] @@ -180,9 +167,6 @@ def test_renumber_negative_col(): assert check_dst.equals(gdf["dest_list"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.skip(reason="dropped renumbering from series support") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_series(graph_file): @@ -220,9 +204,6 @@ def test_renumber_series(graph_file): assert check_dst["0_y"].equals(check_dst["0_x"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files(graph_file): gc.collect() @@ -256,9 +237,6 @@ def test_renumber_files(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_col(graph_file): gc.collect() @@ -291,9 +269,6 @@ def test_renumber_files_col(graph_file): assert exp_dst.equals(unrenumbered_df["dst"]) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_renumber_files_multi_col(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_sssp.py b/python/cugraph/tests/test_sssp.py index 9e866c84f07..9230b7a7b96 100644 --- a/python/cugraph/tests/test_sssp.py +++ b/python/cugraph/tests/test_sssp.py @@ -28,7 +28,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -204,9 +204,6 @@ def single_dataset_source_nxresults_weighted(request): # ============================================================================= # Tests # ============================================================================= -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): # Extract the params generated from the fixture @@ -236,9 +233,6 @@ def test_sssp(gpubenchmark, dataset_source_nxresults, cugraph_input_type): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_nonnative_inputs(gpubenchmark, @@ -249,9 +243,6 @@ def test_sssp_nonnative_inputs(gpubenchmark, cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.CUGRAPH_DIR_INPUT_TYPES) def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, cugraph_input_type): @@ -286,9 +277,6 @@ def test_sssp_edgevals(gpubenchmark, dataset_source_nxresults_weighted, assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("cugraph_input_type", utils.NX_DIR_INPUT_TYPES + utils.MATRIX_INPUT_TYPES) def test_sssp_edgevals_nonnative_inputs( @@ -300,9 +288,6 @@ def test_sssp_edgevals_nonnative_inputs( cugraph_input_type) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("source", SOURCES) def test_sssp_data_type_conversion(graph_file, source): @@ -357,9 +342,6 @@ def test_sssp_data_type_conversion(graph_file, source): assert err == 0 -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_scipy_api_compat(): graph_file = utils.DATASETS[0] diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 5be80f341b5..56c1c23e0ea 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -19,7 +19,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,9 +71,6 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -88,9 +85,6 @@ def test_subgraph_extraction_DiGraph(graph_file): assert compare_edges(cu_sg, nx_sg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph(graph_file): gc.collect() @@ -105,9 +99,6 @@ def test_subgraph_extraction_Graph(graph_file): assert compare_edges(cu_sg, nx_sg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_Graph_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_triangle_count.py b/python/cugraph/tests/test_triangle_count.py index d768183e4ad..917a4f320a7 100644 --- a/python/cugraph/tests/test_triangle_count.py +++ b/python/cugraph/tests/test_triangle_count.py @@ -18,7 +18,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -67,9 +67,6 @@ def networkx_call(M): # https://github.com/rapidsai/cugraph/issues/1043 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles(graph_file): gc.collect() @@ -80,9 +77,6 @@ def test_triangles(graph_file): assert cu_count == nx_count -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_edge_vals(graph_file): gc.collect() @@ -93,9 +87,6 @@ def test_triangles_edge_vals(graph_file): assert cu_count == nx_count -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_triangles_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index b350ef27efd..2ca820271c0 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -12,16 +12,14 @@ # limitations under the License. import gc +from pathlib import PurePath + import pytest + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -from pathlib import PurePath -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() @@ -47,9 +45,6 @@ def test_bfs_paths(): assert "not in the result set" in str(ErrorMsg) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index f31d65de652..9f82857a8d7 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -20,7 +20,7 @@ import cudf import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -87,9 +87,6 @@ def networkx_call(M): return coeff -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_wjaccard(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index 50d7b0ecf84..b6ceda40116 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -16,11 +16,11 @@ import pytest import scipy +import numpy as np import cudf + import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than -import numpy as np def cugraph_call(cu_M, pairs): @@ -84,9 +84,6 @@ def cpu_call(M, first, second): return result -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_woverlap(graph_file): gc.collect() From 48bf0588aceb31ae9e2cf56ee747a121af357e35 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 2 Apr 2021 19:39:29 -0400 Subject: [PATCH 42/51] Reduce the size of the cugraph libraries (#1503) By explicitly telling nvcc's fatbin pass to always compress device code we can ensure that our binaries are the smallest possible size. See https://github.com/rapidsai/cudf/pull/7583 for additional context. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1503 --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3b1e93d0781..50a5d80d685 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -116,6 +116,7 @@ set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=comput set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all") # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking From a6edf62469c44f5b50c3a4558a2167f7a89f46fe Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Mon, 5 Apr 2021 18:14:03 -0500 Subject: [PATCH 43/51] Updating RAFT tag (#1509) There was a PR merged into RAFT today, updating to the most recent commit hash : f0cd81fb49638eaddc9bf18998cc894f292bc293. Authors: - Alex Fender (https://github.com/afender) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1509 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 50a5d80d685..1997fd75dab 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -302,7 +302,7 @@ else(DEFINED ENV{RAFT_PATH}) FetchContent_Declare( raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 6455e05b3889db2b495cf3189b33c2b07bfbebf2 + GIT_TAG f0cd81fb49638eaddc9bf18998cc894f292bc293 SOURCE_SUBDIR raft ) From 9a1ab0921972b7c1c7f6ace78dd01e16551d7b41 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 6 Apr 2021 10:00:34 -0400 Subject: [PATCH 44/51] Update graph partitioning scheme (#1443) Partially addresses Issue #1442 Update graph partitioning scheme to better control memory footprint vs concurrency trade-offs for large-scale graph processing in large clusters. This new partitioning scheme also simplifies communication patterns among GPUs which can potentially improve scalability. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Alex Fender (https://github.com/afender) - Andrei Schaffer (https://github.com/aschaffer) URL: https://github.com/rapidsai/cugraph/pull/1443 --- cpp/CMakeLists.txt | 1 + cpp/include/dendrogram.hpp | 2 +- .../experimental/detail/graph_utils.cuh | 80 +- cpp/include/experimental/graph.hpp | 14 + cpp/include/experimental/graph_functions.hpp | 170 +++- cpp/include/experimental/graph_view.hpp | 102 +-- cpp/include/matrix_partition_device.cuh | 2 +- .../patterns/copy_to_adj_matrix_row_col.cuh | 436 ++++----- .../copy_v_transform_reduce_in_out_nbr.cuh | 130 +-- ...ransform_reduce_key_aggregated_out_nbr.cuh | 246 +++-- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 31 +- .../update_frontier_v_push_if_out_nbr.cuh | 256 ++---- cpp/include/patterns/vertex_frontier.cuh | 109 +-- cpp/include/utilities/collect_comm.cuh | 179 +++- cpp/include/utilities/cython.hpp | 28 +- cpp/include/utilities/dataframe_buffer.cuh | 29 +- cpp/include/utilities/device_comm.cuh | 100 ++ cpp/include/utilities/shuffle_comm.cuh | 175 ++-- cpp/include/utilities/thrust_tuple_utils.cuh | 17 - cpp/src/community/ecg.cu | 2 +- cpp/src/community/leiden.cuh | 2 +- cpp/src/community/louvain.cuh | 2 +- cpp/src/experimental/bfs.cu | 16 +- cpp/src/experimental/coarsen_graph.cu | 422 +++++---- .../experimental/generate_rmat_edgelist.cu | 4 +- cpp/src/experimental/graph.cu | 82 +- cpp/src/experimental/graph_view.cu | 186 +++- cpp/src/experimental/louvain.cuh | 34 +- cpp/src/experimental/relabel.cu | 60 +- cpp/src/experimental/renumber_edgelist.cu | 851 ++++++++++-------- cpp/src/experimental/renumber_utils.cu | 477 ++++++++++ cpp/src/experimental/sssp.cu | 16 +- cpp/src/utilities/cython.cu | 265 ++++-- cpp/tests/CMakeLists.txt | 33 +- cpp/tests/community/egonet_test.cu | 7 +- cpp/tests/community/mg_louvain_helper.cu | 3 +- cpp/tests/community/mg_louvain_test.cpp | 21 +- cpp/tests/experimental/bfs_test.cpp | 245 +++-- cpp/tests/experimental/coarsen_graph_test.cpp | 5 +- cpp/tests/experimental/generate_rmat_test.cpp | 23 +- cpp/tests/experimental/graph_test.cpp | 2 +- .../experimental/katz_centrality_test.cpp | 245 +++-- cpp/tests/experimental/mg_bfs_test.cpp | 303 +++++++ .../experimental/mg_katz_centrality_test.cpp | 268 ++++++ cpp/tests/experimental/mg_sssp_test.cpp | 314 +++++++ cpp/tests/experimental/pagerank_test.cpp | 270 ++++-- cpp/tests/experimental/rw_low_level_test.cu | 21 +- cpp/tests/experimental/sssp_test.cpp | 267 ++++-- cpp/tests/pagerank/mg_pagerank_test.cpp | 400 ++++---- .../utilities/generate_graph_from_edgelist.cu | 199 ++-- .../utilities/matrix_market_file_utilities.cu | 69 ++ cpp/tests/utilities/rmat_utilities.cu | 733 +++++++++------ cpp/tests/utilities/test_utilities.hpp | 50 +- cpp/tests/utilities/thrust_wrapper.cu | 81 ++ cpp/tests/utilities/thrust_wrapper.hpp | 30 + python/cugraph/community/egonet_wrapper.pyx | 7 +- .../centrality/mg_katz_centrality_wrapper.pyx | 11 +- .../dask/community/louvain_wrapper.pyx | 7 +- .../link_analysis/mg_pagerank_wrapper.pyx | 11 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 5 +- .../dask/traversal/mg_sssp_wrapper.pyx | 7 +- .../link_analysis/pagerank_wrapper.pyx | 8 +- python/cugraph/structure/graph_utilities.pxd | 24 +- python/cugraph/structure/renumber_wrapper.pyx | 127 +-- 64 files changed, 5481 insertions(+), 2841 deletions(-) create mode 100644 cpp/src/experimental/renumber_utils.cu create mode 100644 cpp/tests/experimental/mg_bfs_test.cpp create mode 100644 cpp/tests/experimental/mg_katz_centrality_test.cpp create mode 100644 cpp/tests/experimental/mg_sssp_test.cpp create mode 100644 cpp/tests/utilities/thrust_wrapper.cu create mode 100644 cpp/tests/utilities/thrust_wrapper.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1997fd75dab..5a3cb65caa5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -432,6 +432,7 @@ add_library(cugraph SHARED src/experimental/graph_view.cu src/experimental/coarsen_graph.cu src/experimental/renumber_edgelist.cu + src/experimental/renumber_utils.cu src/experimental/relabel.cu src/experimental/induced_subgraph.cu src/experimental/bfs.cu diff --git a/cpp/include/dendrogram.hpp b/cpp/include/dendrogram.hpp index bb9ba470a52..aa0802e80b3 100644 --- a/cpp/include/dendrogram.hpp +++ b/cpp/include/dendrogram.hpp @@ -27,7 +27,7 @@ class Dendrogram { public: void add_level(vertex_t first_index, vertex_t num_verts, - cudaStream_t stream = 0, + cudaStream_t stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { level_ptr_.push_back(std::make_unique>(num_verts, stream, mr)); diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index 084d68b8ba4..d79788e59ce 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -56,65 +56,32 @@ rmm::device_uvector compute_major_degrees( rmm::device_uvector degrees(0, handle.get_stream()); vertex_t max_num_local_degrees{0}; - for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); - ++i) { - auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(i * row_comm_size + row_comm_rank) - : static_cast(col_comm_rank * row_comm_size + i); + for (int i = 0; i < col_comm_size; ++i) { + auto vertex_partition_idx = static_cast(i * row_comm_size + row_comm_rank); auto vertex_partition_size = partition.get_vertex_partition_size(vertex_partition_idx); max_num_local_degrees = std::max(max_num_local_degrees, vertex_partition_size); - if (i == (partition.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank)) { - degrees.resize(vertex_partition_size, handle.get_stream()); - } + if (i == col_comm_rank) { degrees.resize(vertex_partition_size, handle.get_stream()); } } local_degrees.resize(max_num_local_degrees, handle.get_stream()); - for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); - ++i) { - auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(i * row_comm_size + row_comm_rank) - : static_cast(col_comm_rank * row_comm_size + i); + for (int i = 0; i < col_comm_size; ++i) { + auto vertex_partition_idx = static_cast(i * row_comm_size + row_comm_rank); vertex_t major_first{}; vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); - auto p_offsets = - partition.is_hypergraph_partitioned() - ? adj_matrix_partition_offsets[i] - : adj_matrix_partition_offsets[0] + - (major_first - partition.get_vertex_partition_first(col_comm_rank * row_comm_size)); + auto p_offsets = adj_matrix_partition_offsets[i]; thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_last - major_first), local_degrees.data(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); - if (partition.is_hypergraph_partitioned()) { - col_comm.reduce(local_degrees.data(), - i == col_comm_rank ? degrees.data() : static_cast(nullptr), - static_cast(major_last - major_first), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } else { - row_comm.reduce(local_degrees.data(), - i == row_comm_rank ? degrees.data() : static_cast(nullptr), - static_cast(major_last - major_first), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } + col_comm.reduce(local_degrees.data(), + i == col_comm_rank ? degrees.data() : static_cast(nullptr), + static_cast(major_last - major_first), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } - raft::comms::status_t status{}; - if (partition.is_hypergraph_partitioned()) { - status = - col_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become - // out-of-scope once this function returns. - } else { - status = - row_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become - // out-of-scope once this function returns. - } - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return degrees; } @@ -170,7 +137,6 @@ struct compute_gpu_id_from_vertex_t { template struct compute_gpu_id_from_edge_t { - bool hypergraph_partitioned{false}; int comm_size{0}; int row_comm_size{0}; int col_comm_size{0}; @@ -180,12 +146,22 @@ struct compute_gpu_id_from_edge_t { cuco::detail::MurmurHash3_32 hash_func{}; auto major_comm_rank = static_cast(hash_func(major) % comm_size); auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); - if (hypergraph_partitioned) { - return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); - } else { - return (major_comm_rank - (major_comm_rank % row_comm_size)) + - (minor_comm_rank / col_comm_size); - } + return (minor_comm_rank / row_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } +}; + +template +struct compute_partition_id_from_edge_t { + int comm_size{0}; + int row_comm_size{0}; + int col_comm_size{0}; + + __device__ int operator()(vertex_t major, vertex_t minor) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_comm_rank = static_cast(hash_func(major) % comm_size); + auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); + return major_comm_rank * col_comm_size + minor_comm_rank / row_comm_size; } }; diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 6a10256e6f4..a380200ea1f 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -188,6 +188,20 @@ template struct invalid_edge_id : invalid_idx { }; +template +__host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_t num_vertices, vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +__host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_t num_vertices, vertex_t v) +{ + return v < num_vertices; +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 7b4bb466b97..100742adccd 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -17,13 +17,13 @@ #include #include -#include #include #include #include #include +#include namespace cugraph { namespace experimental { @@ -40,19 +40,24 @@ namespace experimental { * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (major, minor) pair should return the local GPU ID for this function to work (edges should - * be pre-shuffled). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t - * functor to every (major, minor) pair should return the local GPU ID for this function to work - * (edges should be pre-shuffled). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). + * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored + * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs + * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition + * assigned to this process). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to @@ -63,10 +68,9 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check = false); /** @@ -115,19 +119,24 @@ std::enable_if_t> renumber_edgelist( * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function * to work (vertices should be pre-shuffled). * @param num_local_vertices Number of local vertices. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to - * every (major, minor) pair should return the local GPU ID for this function to work (edges should - * be pre-shuffled). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t - * functor to every (major, minor) pair should return the local GPU ID for this function to work - * (edges should be pre-shuffled). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning - * (this flag will be removed in the future). + * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition + * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored + * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs + * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target + * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, + * minor) pair should return the GPU ID of this process and applying the + * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition + * should return the partition ID of the corresponding matrix partition. + * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition + * assigned to this process). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return std::tuple, partition_t, vertex_t, edge_t> * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to @@ -140,10 +149,9 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check = false); /** @@ -181,6 +189,102 @@ std::enable_if_t> renumber_edgelist( edge_t num_edgelist_edges, bool do_expensive_check = false); +/** + * @brief Renumber external vertices to internal vertices based on the provoided @p + * renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the vertices to be renumbered. The input external vertices are + * renumbered to internal vertices in-place. + * @param num_vertices Number of vertices to be renumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void renumber_ext_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check = false); + +/** + * @brief Unrenumber local internal vertices to external vertices based on the providied @p + * renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the local internal vertices to be unrenumbered. Each input element + * should be in [@p local_int_vertex_first, @p local_int_vertex_last). The input internal vertices + * are renumbered to external vertices in-place. + * @param num_vertices Number of vertices to be unrenumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void unrenumber_local_int_vertices( + raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels /* size = local_int_vertex_last - local_int_vertex_first */, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check = false); + +/** + * @brief Unrenumber (possibly non-local) internal vertices to external vertices based on the + * providied @p renumber_map_labels. + * + * Note cugraph::experimental::invalid_id::value remains unchanged. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Pointer to the internal vertices to be unrenumbered. The input internal vertices + * are renumbered to external vertices in-place. + * @param num_vertices Number of vertices to be unrenumbered. + * @param renumber_map_labels Pointer to the external vertices corresponding to the internal + * vertices in the range [@p local_int_vertex_first, @p local_int_vertex_last). + * @param local_int_vertex_first The first local internal vertex (inclusive, assigned to this + * process in multi-GPU). + * @param local_int_vertex_last The last local internal vertex (exclusive, assigned to this process + * in multi-GPU). + * @param vertex_partition_lasts Last local internal vertices (exclusive, assigned to each process + * in multi-GPU). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void unrenumber_int_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check = false); + /** * @brief Compute the coarsened graph. * diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 5d3d09bb087..47c93b42ca9 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -40,32 +40,11 @@ namespace experimental { * * We need to partition 1D vertex arrays (storing per vertex values) and the 2D graph adjacency * matrix (or transposed 2D graph adjacency matrix) of G. An 1D vertex array of size V is divided to - * P linear partitions; each partition has the size close to V / P. We consider two different - * strategies to partition the 2D matrix: the default strategy and the hypergraph partitioning based - * strategy (the latter is for future extension). - * FIXME: in the future we may use the latter for both as this leads to simpler communication - * patterns and better control over parallelism vs memory footprint trade-off. + * P linear partitions; each partition has the size close to V / P. * - * In the default case, one GPU will be responsible for 1 rectangular partition. The matrix will be - * horizontally partitioned first to P_row slabs. Each slab will be further vertically partitioned - * to P_col rectangles. Each rectangular partition will have the size close to V / P_row by V / - * P_col. - * - * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for one - * rectangular partition [a,b) by [c,d) where a = vertex_partition_offsets[row_comm_size * - * col_comm_rank], b = vertex_partition_offsets[row_comm_size * (col_comm_rank + 1)], c = - * vertex_partition_offsets[col_comm_size * row_comm_rank], and d = - * vertex_partition_offsets[col_comm_size * (row_comm_rank + 1)]. - * - * In the future, we may apply hyper-graph partitioning to divide V vertices to P groups minimizing - * edge cuts across groups while balancing the number of vertices in each group. We will also - * renumber vertices so the vertices in each group are mapped to consecutive integers. Then, there - * will be more non-zeros in the diagonal partitions of the 2D graph adjacency matrix (or the - * transposed 2D graph adjacency matrix) than the off-diagonal partitions. The default strategy does - * not balance the number of nonzeros if hyper-graph partitioning is applied. To solve this problem, - * the matrix is first horizontally partitioned to P slabs, then each slab will be further - * vertically partitioned to P_row (instead of P_col in the default case) rectangles. One GPU will - * be responsible col_comm_size rectangular partitions in this case. + * The 2D graph adjacency matrix is first horizontally partitioned to P slabs, then each slab will + * be further vertically partitioned to P_row (instead of P_col in the default case) rectangles. One + * GPU will be responsible col_comm_size rectangular partitions. * * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for * col_comm_size rectangular partitions [a_i,b_i) by [c,d) where a_i = @@ -85,13 +64,11 @@ class partition_t { partition_t() = default; partition_t(std::vector const& vertex_partition_offsets, - bool hypergraph_partitioned, int row_comm_size, int col_comm_size, int row_comm_rank, int col_comm_rank) : vertex_partition_offsets_(vertex_partition_offsets), - hypergraph_partitioned_(hypergraph_partitioned), comm_rank_(col_comm_rank * row_comm_size + row_comm_rank), row_comm_size_(row_comm_size), col_comm_size_(col_comm_size), @@ -159,10 +136,7 @@ class partition_t { get_vertex_partition_first(vertex_partition_idx); } - size_t get_number_of_matrix_partitions() const - { - return hypergraph_partitioned_ ? col_comm_size_ : 1; - } + size_t get_number_of_matrix_partitions() const { return col_comm_size_; } // major: row of the graph adjacency matrix (if the graph adjacency matrix is stored as is) or // column of the graph adjacency matrix (if the transposed graph adjacency matrix is stored). @@ -175,16 +149,18 @@ class partition_t { vertex_t get_matrix_partition_major_first(size_t partition_idx) const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] - : vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; + return vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_]; } vertex_t get_matrix_partition_major_last(size_t partition_idx) const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] - : vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; + return vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1]; + } + + vertex_t get_matrix_partition_major_size(size_t partition_idx) const + { + return get_matrix_partition_major_last(partition_idx) - + get_matrix_partition_major_first(partition_idx); } vertex_t get_matrix_partition_major_value_start_offset(size_t partition_idx) const @@ -204,24 +180,21 @@ class partition_t { vertex_t get_matrix_partition_minor_first() const { - return hypergraph_partitioned_ ? vertex_partition_offsets_[col_comm_rank_ * row_comm_size_] - : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; + return vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; } vertex_t get_matrix_partition_minor_last() const { - return hypergraph_partitioned_ - ? vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_] - : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; + return vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; } - // FIXME: this function may be removed if we use the same partitioning strategy whether hypergraph - // partitioning is applied or not - bool is_hypergraph_partitioned() const { return hypergraph_partitioned_; } + vertex_t get_matrix_partition_minor_size() const + { + return get_matrix_partition_minor_last() - get_matrix_partition_minor_first(); + } private: std::vector vertex_partition_offsets_{}; // size = P + 1 - bool hypergraph_partitioned_{false}; int comm_rank_{0}; int row_comm_size_{0}; @@ -236,6 +209,7 @@ class partition_t { struct graph_properties_t { bool is_symmetric{false}; bool is_multigraph{false}; + bool is_weighted{false}; }; namespace detail { @@ -277,6 +251,7 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } + bool is_weighted() const { return properties_.is_weighted; } protected: raft::handle_t const* get_handle_ptr() const { return handle_ptr_; }; @@ -334,11 +309,6 @@ class graph_view_t 0; } - - // FIXME: this should be removed once MNMG Louvain is updated to use graph primitives - partition_t get_partition() const { return partition_; } - vertex_t get_number_of_local_vertices() const { return partition_.get_local_vertex_last() - partition_.get_local_vertex_first(); @@ -421,6 +391,12 @@ class graph_view_t compute_in_weight_sums(raft::handle_t const& handle) const; rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + edge_t compute_max_in_degree(raft::handle_t const& handle) const; + edge_t compute_max_out_degree(raft::handle_t const& handle) const; + + weight_t compute_max_in_weight_sum(raft::handle_t const& handle) const; + weight_t compute_max_out_weight_sum(raft::handle_t const& handle) const; + private: std::vector adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; @@ -549,8 +535,6 @@ class graph_view_tget_number_of_vertices(); } constexpr vertex_t get_local_vertex_first() const { return vertex_t{0}; } @@ -628,8 +612,6 @@ class graph_view_t compute_in_weight_sums(raft::handle_t const& handle) const; rmm::device_uvector compute_out_weight_sums(raft::handle_t const& handle) const; + edge_t compute_max_in_degree(raft::handle_t const& handle) const; + edge_t compute_max_out_degree(raft::handle_t const& handle) const; + + weight_t compute_max_in_weight_sum(raft::handle_t const& handle) const; + weight_t compute_max_out_weight_sum(raft::handle_t const& handle) const; + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index b41119e7be6..30d6540bcfe 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -192,7 +192,7 @@ class matrix_partition_device_t rx_counts(row_comm_size, size_t{0}); - std::vector displacements(row_comm_size, size_t{0}); - for (int i = 0; i < row_comm_size; ++i) { - rx_counts[i] = graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); - displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; - } - device_allgatherv(row_comm, - vertex_value_input_first, - matrix_major_value_output_first, - rx_counts, - displacements, - handle.get_stream()); + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector rx_counts(col_comm_size, size_t{0}); + std::vector displacements(col_comm_size, size_t{0}); + for (int i = 0; i < col_comm_size; ++i) { + rx_counts[i] = graph_view.get_vertex_partition_size(i * row_comm_size + row_comm_rank); + displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; } + device_allgatherv(col_comm, + vertex_value_input_first, + matrix_major_value_output_first, + rx_counts, + displacements, + handle.get_stream()); } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() @@ -101,80 +97,78 @@ void copy_to_matrix_major(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - auto rx_counts = - host_scalar_allgather(row_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - - matrix_partition_device_t matrix_partition(graph_view, 0); - for (int i = 0; i < row_comm_size; ++i) { - rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], - handle.get_stream()); - auto rx_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_counts[i], - handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(rx_tmp_buffer); + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(col_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + + for (int i = 0; i < col_comm_size; ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); - if (row_comm_rank == i) { - vertex_partition_device_t vertex_partition(graph_view); - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { - return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); - }); - // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a - // permutation iterator (and directly gathers to the internal buffer) - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_value_input_first, - rx_value_first); - } + if (col_comm_rank == i) { + vertex_partition_device_t vertex_partition(graph_view); + auto map_first = + thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + }); + // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a + // permutation iterator (and directly gathers to the internal buffer) + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_first, + map_first + thrust::distance(vertex_first, vertex_last), + vertex_value_input_first, + rx_value_first); + } - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - device_bcast( - row_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); - device_bcast( - row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + col_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(col_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - if (row_comm_rank == i) { - auto map_first = - thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { - return matrix_partition.get_major_offset_from_major_nocheck(v); - }); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_major_value_output_first); - } else { - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_major_offset_from_major_nocheck(v); - }); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_major_value_output_first); - } + if (col_comm_rank == i) { + auto map_first = + thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first + matrix_partition.get_major_value_start_offset()); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first + matrix_partition.get_major_value_start_offset()); } } } else { @@ -199,59 +193,27 @@ void copy_to_matrix_minor(raft::handle_t const& handle, MatrixMinorValueOutputIterator matrix_minor_value_output_first) { if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may be no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_input_first, - vertex_value_input_first + graph_view.get_number_of_local_vertices(), - matrix_minor_value_output_first + - (graph_view.get_vertex_partition_first(comm_src_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size))); - } else { - device_sendrecv( - comm, - vertex_value_input_first, - static_cast(graph_view.get_number_of_local_vertices()), - comm_dst_rank, - matrix_minor_value_output_first + - (graph_view.get_vertex_partition_first(comm_src_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(graph_view.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - handle.get_stream()); - } - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - for (int i = 0; i < col_comm_size; ++i) { - auto offset = graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto count = graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i); - device_bcast(col_comm, - matrix_minor_value_output_first + offset, - matrix_minor_value_output_first + offset, - count, - i, - handle.get_stream()); - } + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector rx_counts(row_comm_size, size_t{0}); + std::vector displacements(row_comm_size, size_t{0}); + for (int i = 0; i < row_comm_size; ++i) { + rx_counts[i] = graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; } + device_allgatherv(row_comm, + vertex_value_input_first, + matrix_minor_value_output_first, + rx_counts, + displacements, + handle.get_stream()); } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() @@ -277,143 +239,75 @@ void copy_to_matrix_minor(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; if (GraphViewType::is_multi_gpu) { - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); - - // FIXME: this P2P is unnecessary if apply the same partitioning scheme regardless of - // hypergraph partitioning is applied or not - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - size_t tx_count = thrust::distance(vertex_first, vertex_last); - size_t rx_count{}; - // FIXME: it seems like raft::isend and raft::irecv do not properly handle the destination (or - // source) == self case. Need to double check and fix this if this is indeed the case (or RAFT - // may use ncclSend/ncclRecv instead of UCX for device data). - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - rx_count = tx_count; - } else { - std::vector count_requests(2); - comm.isend(&tx_count, 1, comm_dst_rank, 0 /* tag */, count_requests.data()); - comm.irecv(&rx_count, 1, comm_src_rank, 0 /* tag */, count_requests.data() + 1); - comm.waitall(count_requests.size(), count_requests.data()); - } - - vertex_partition_device_t vertex_partition(graph_view); - rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); - auto dst_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_count, + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(row_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + + matrix_partition_device_t matrix_partition(graph_view, 0); + for (int i = 0; i < row_comm_size; ++i) { + rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], handle.get_stream()); - auto dst_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(dst_tmp_buffer); - if (comm_src_rank == comm_rank) { - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - dst_vertices.begin()); - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { - return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); - }); - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_value_input_first, - dst_value_first); - } else { - auto src_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(tx_count, - handle.get_stream()); - auto src_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(src_tmp_buffer); + auto rx_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); + if (row_comm_rank == i) { + vertex_partition_device_t vertex_partition(graph_view); auto map_first = thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); }); + // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a + // permutation iterator (and directly gathers to the internal buffer) thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), map_first, map_first + thrust::distance(vertex_first, vertex_last), vertex_value_input_first, - src_value_first); - - device_sendrecv( - comm, - vertex_first, - tx_count, - comm_dst_rank, - dst_vertices.begin(), - rx_count, - comm_src_rank, - handle.get_stream()); - - device_sendrecv(comm, - src_value_first, - tx_count, - comm_dst_rank, - dst_value_first, - rx_count, - comm_src_rank, - handle.get_stream()); + rx_value_first); } - // FIXME: now we can clear tx_tmp_buffer - - auto rx_counts = host_scalar_allgather(col_comm, rx_count, handle.get_stream()); - - matrix_partition_device_t matrix_partition(graph_view, 0); - for (int i = 0; i < col_comm_size; ++i) { - rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], - handle.get_stream()); - auto rx_tmp_buffer = allocate_dataframe_buffer< - typename std::iterator_traits::value_type>(rx_counts[i], - handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin< - typename std::iterator_traits::value_type>(rx_tmp_buffer); - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - device_bcast(col_comm, - dst_vertices.begin(), - rx_vertices.begin(), - rx_counts[i], - i, - handle.get_stream()); - device_bcast( - col_comm, dst_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - - if (col_comm_rank == i) { - auto map_first = thrust::make_transform_iterator( - dst_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_minor_offset_from_minor_nocheck(v); - }); - - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - dst_value_first, - dst_value_first + rx_counts[i], - map_first, - matrix_minor_value_output_first); - } else { - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_minor_offset_from_minor_nocheck(v); - }); + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + row_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_minor_value_output_first); - } + if (row_comm_rank == i) { + auto map_first = + thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); } } } else { diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 3059cf95852..e6a73a874ae 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -362,16 +362,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - auto comm_rank = handle.comms_initialized() ? handle.get_comms().get_rank() : int{0}; - auto minor_tmp_buffer_size = (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) ? GraphViewType::is_adj_matrix_transposed @@ -386,10 +376,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - minor_init = graph_view.is_hypergraph_partitioned() ? (row_comm_rank == 0) ? init : T{} - : (col_comm_rank == 0) ? init : T{}; + minor_init = (row_comm_rank == 0) ? init : T{}; } if (GraphViewType::is_multi_gpu) { @@ -407,24 +394,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, assert(minor_tmp_buffer_size == 0); } - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); - auto major_tmp_buffer_size = vertex_t{0}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - major_tmp_buffer_size = - (in == GraphViewType::is_adj_matrix_transposed) - ? graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_size() - : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) - : vertex_t{0}; - } + auto major_tmp_buffer_size = + GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed) + ? matrix_partition.get_major_size() + : vertex_t{0}; auto major_tmp_buffer = allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); @@ -432,12 +408,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto major_init = T{}; if (in == GraphViewType::is_adj_matrix_transposed) { if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - major_init = graph_view.is_hypergraph_partitioned() ? (col_comm_rank == 0) ? init : T{} - : (row_comm_rank == 0) ? init : T{}; + major_init = (col_comm_rank == 0) ? init : T{}; } else { major_init = init; } @@ -450,8 +423,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; + comm_root_rank = i * row_comm_size + row_comm_rank; } if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { @@ -505,25 +477,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - if (graph_view.is_hypergraph_partitioned()) { - device_reduce( - col_comm, - major_buffer_first, - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(i * row_comm_size + i)), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } else { - device_reduce(row_comm, - major_buffer_first, - vertex_value_output_first, - static_cast( - graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } + device_reduce(col_comm, + major_buffer_first, + vertex_value_output_first, + matrix_partition.get_major_size(), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } } @@ -537,53 +497,17 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - for (int i = 0; i < col_comm_size; ++i) { - auto offset = (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)); - auto size = static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i)); - device_reduce(col_comm, - minor_buffer_first + offset, - minor_buffer_first + offset, - size, - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - auto comm_dst_rank = row_comm_rank * col_comm_size + col_comm_rank; - // FIXME: this branch may no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - auto offset = - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto size = static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_buffer_first + offset, - minor_buffer_first + offset + size, - vertex_value_output_first); - } else { - device_sendrecv( - comm, - minor_buffer_first + - (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast( - graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)), - comm_dst_rank, - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(comm_rank)), - comm_src_rank, - handle.get_stream()); - } + for (int i = 0; i < row_comm_size; ++i) { + auto offset = (graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size)); + device_reduce(row_comm, + minor_buffer_first + offset, + vertex_value_output_first, + static_cast( + graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)), + raft::comms::op_t::SUM, + i, + handle.get_stream()); } } } diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 19a5f67c9de..22dc2041793 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -170,8 +171,8 @@ __global__ void for_all_major_for_all_nbr_low_degree( */ template ::value_type, + static_assert(std::is_same::value_type, typename GraphViewType::vertex_type>::value); + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using vertex_t = typename GraphViewType::vertex_type; @@ -206,64 +209,113 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // 1. build a cuco::static_map object for the k, v pairs. auto kv_map_ptr = std::make_unique>( - static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / - load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); - - // 2. aggregate each vertex out-going edges based on keys and transform-reduce. - - auto loop_count = size_t{1}; + size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); + + auto map_counts = + host_scalar_allgather(row_comm, + static_cast(thrust::distance(map_key_first, map_key_last)), + handle.get_stream()); + std::vector map_displacements(row_comm_size, size_t{0}); + std::partial_sum(map_counts.begin(), map_counts.end() - 1, map_displacements.begin() + 1); + rmm::device_uvector map_keys(map_displacements.back() + map_counts.back(), + handle.get_stream()); + auto map_value_buffer = + allocate_dataframe_buffer(map_keys.size(), handle.get_stream()); + for (int i = 0; i < row_comm_size; ++i) { + device_bcast(row_comm, + map_key_first, + map_keys.begin() + map_displacements[i], + map_counts[i], + i, + handle.get_stream()); + device_bcast(row_comm, + map_value_first, + get_dataframe_buffer_begin(map_value_buffer) + map_displacements[i], + map_counts[i], + i, + handle.get_stream()); + } + // FIXME: these copies are unnecessary, better fix RAFT comm's bcast to take separate input & + // output pointers + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_key_first, + map_key_last, + map_keys.begin() + map_displacements[row_comm_rank]); + thrust::copy( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_value_first, + map_value_first + thrust::distance(map_key_first, map_key_last), + get_dataframe_buffer_begin(map_value_buffer) + map_displacements[row_comm_rank]); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(map_keys.size()) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (map_keys.size()) { kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } + } else { + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last) > 0) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } } + // 2. aggregate each vertex out-going edges based on keys and transform-reduce. + rmm::device_uvector major_vertices(0, handle.get_stream()); auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); - - int comm_root_rank = 0; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; - } + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); - auto num_edges = thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), - thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()); - - rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); + rmm::device_uvector tmp_major_vertices(matrix_partition.get_number_of_edges(), + handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), handle.get_stream()); - if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + if (matrix_partition.get_major_size() > 0) { raft::grid_1d_thread_t update_grid( - graph_view.get_vertex_partition_size(comm_root_rank), + matrix_partition.get_major_size(), detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); @@ -277,8 +329,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( 0, handle.get_stream()>>>( matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), adj_matrix_col_key_first, tmp_major_vertices.data(), tmp_minor_keys.data(), @@ -300,10 +352,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() - ? cugraph::partition_2d::key_naming_t().col_name() - : cugraph::partition_2d::key_naming_t().row_name()); - auto const sub_comm_size = sub_comm.get_size(); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); triplet_first = thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), @@ -315,11 +371,13 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( std::forward_as_tuple( std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = groupby_gpuid_and_shuffle_values( - sub_comm, + col_comm, triplet_first, triplet_first + tmp_major_vertices.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}] __device__( - auto val) { return key_func(thrust::get<1>(val)); }, + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}, + row_comm_size] __device__(auto val) { + return key_func(thrust::get<1>(val)) / row_comm_size; + }, handle.get_stream()); auto pair_first = thrust::make_zip_iterator( @@ -355,56 +413,52 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( triplet_first = thrust::make_zip_iterator(thrust::make_tuple( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - triplet_first, - triplet_first + tmp_major_vertices.size(), - tmp_e_op_result_buffer_first, - [adj_matrix_row_value_input_first, - key_aggregated_e_op, - matrix_partition, - kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { - auto major = thrust::get<0>(val); - auto key = thrust::get<1>(val); - auto w = thrust::get<2>(val); - return key_aggregated_e_op( - major, - key, - w, - *(adj_matrix_row_value_input_first + - matrix_partition.get_major_offset_from_major_nocheck(major)), - kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); - }); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + tmp_major_vertices.size(), + tmp_e_op_result_buffer_first, + [adj_matrix_row_value_input_first = + adj_matrix_row_value_input_first + matrix_partition.get_major_value_start_offset(), + key_aggregated_e_op, + matrix_partition, + kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { + auto major = thrust::get<0>(val); + auto key = thrust::get<1>(val); + auto w = thrust::get<2>(val); + return key_aggregated_e_op(major, + key, + w, + *(adj_matrix_row_value_input_first + + matrix_partition.get_major_offset_from_major_nocheck(major)), + kv_map.find(key)->second.load(cuda::std::memory_order_relaxed)); + }); tmp_minor_keys.resize(0, handle.get_stream()); tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() - ? cugraph::partition_2d::key_naming_t().col_name() - : cugraph::partition_2d::key_naming_t().row_name()); - auto const sub_comm_rank = sub_comm.get_rank(); - auto const sub_comm_size = sub_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). auto rx_sizes = - host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream()); - std::vector rx_displs( - static_cast(sub_comm_rank) == i ? sub_comm_size : int{0}, size_t{0}); - if (static_cast(sub_comm_rank) == i) { + host_scalar_gather(col_comm, tmp_major_vertices.size(), i, handle.get_stream()); + std::vector rx_displs{}; + rmm::device_uvector rx_major_vertices(0, handle.get_stream()); + if (static_cast(col_comm_rank) == i) { + rx_displs.assign(col_comm_size, size_t{0}); std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + rx_major_vertices.resize(rx_displs.back() + rx_sizes.back(), handle.get_stream()); } - rmm::device_uvector rx_major_vertices( - static_cast(sub_comm_rank) == i - ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) - : size_t{0}, - handle.get_stream()); auto rx_tmp_e_op_result_buffer = allocate_dataframe_buffer(rx_major_vertices.size(), handle.get_stream()); - device_gatherv(sub_comm, + device_gatherv(col_comm, tmp_major_vertices.data(), rx_major_vertices.data(), tmp_major_vertices.size(), @@ -412,7 +466,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rx_displs, i, handle.get_stream()); - device_gatherv(sub_comm, + device_gatherv(col_comm, tmp_e_op_result_buffer_first, get_dataframe_buffer_begin(rx_tmp_e_op_result_buffer), tmp_major_vertices.size(), @@ -421,7 +475,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( i, handle.get_stream()); - if (static_cast(sub_comm_rank) == i) { + if (static_cast(col_comm_rank) == i) { major_vertices = std::move(rx_major_vertices); e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); } diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index e621ed91ddb..34721c75e31 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -179,20 +179,10 @@ transform_reduce_by_adj_matrix_row_col_key_e( using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - rmm::device_uvector keys(0, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); int comm_root_rank = 0; if (GraphViewType::is_multi_gpu) { @@ -201,8 +191,7 @@ transform_reduce_by_adj_matrix_row_col_key_e( auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank - : col_comm_rank * row_comm_size + i; + comm_root_rank = i * row_comm_size + row_comm_rank; } auto num_edges = thrust::transform_reduce( @@ -224,6 +213,13 @@ transform_reduce_by_adj_matrix_row_col_key_e( detail::transform_reduce_by_key_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. @@ -232,9 +228,10 @@ transform_reduce_by_adj_matrix_row_col_key_e( matrix_partition, graph_view.get_vertex_partition_first(comm_root_rank), graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - adj_matrix_row_col_key_first, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + adj_matrix_row_col_key_first + + (adj_matrix_row_key ? row_value_input_offset : col_value_input_offset), e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4efd32bcac7..4d557b97a30 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -25,12 +25,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -115,12 +117,10 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( static_assert(sizeof(unsigned long long int) == sizeof(size_t)); auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = - remove_first_thrust_tuple_element()(e_op_result); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); } } - idx += gridDim.x * blockDim.x; } } @@ -155,8 +155,8 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the // system HBM size or a function of the maximum number of threads in the system)) // FIXME: actually, we can find how many unique keys are here by now. - // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the - // vertex unless reduce_op is a pure function. + // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding + // the vertex unless reduce_op is a pure function. rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); @@ -234,8 +234,7 @@ __global__ void update_frontier_and_vertex_output_values( auto v_op_result = v_op(v_val, payload); selected_bucket_idx = thrust::get<0>(v_op_result); if (selected_bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = - remove_first_thrust_tuple_element()(v_op_result); + *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); bucket_block_local_offsets[selected_bucket_idx] = 1; } } @@ -349,25 +348,18 @@ void update_frontier_v_push_if_out_nbr( static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using payload_t = typename ReduceOp::type; // 1. fill the buffer - vertex_frontier.set_buffer_idx_value(0); - - auto loop_count = size_t{1}; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - loop_count = graph_view.is_hypergraph_partitioned() - ? graph_view.get_number_of_local_adj_matrix_partitions() - : static_cast(row_comm_size); - } - - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + rmm::device_uvector keys(size_t{0}, handle.get_stream()); + auto payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); rmm::device_uvector frontier_rows( 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true @@ -380,22 +372,18 @@ void update_frontier_v_push_if_out_nbr( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto sub_comm_rank = graph_view.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank; - frontier_size = host_scalar_bcast( - graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, - (static_cast(sub_comm_rank) == i) ? thrust::distance(vertex_first, vertex_last) - : size_t{0}, - i, - handle.get_stream()); + auto sub_comm_rank = col_comm_rank; + frontier_size = host_scalar_bcast(col_comm, + (static_cast(sub_comm_rank) == i) + ? thrust::distance(vertex_first, vertex_last) + : size_t{0}, + i, + handle.get_stream()); if (static_cast(sub_comm_rank) != i) { frontier_rows.resize(frontier_size, handle.get_stream()); } - device_bcast(graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, - vertex_first, - frontier_rows.begin(), - frontier_size, - i, - handle.get_stream()); + device_bcast( + col_comm, vertex_first, frontier_rows.begin(), frontier_size, i, handle.get_stream()); } else { frontier_size = thrust::distance(vertex_first, vertex_last); } @@ -439,10 +427,8 @@ void update_frontier_v_push_if_out_nbr( // locking. // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature // to reserve address space. - vertex_frontier.resize_buffer(vertex_frontier.get_buffer_idx_value() + max_pushes); - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first); - auto buffer_payload_first = std::get<1>(buffer_first); + keys.resize(buffer_idx.value(handle.get_stream()) + max_pushes, handle.get_stream()); + resize_dataframe_buffer(payload_buffer, keys.size(), handle.get_stream()); auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} @@ -467,9 +453,9 @@ void update_frontier_v_push_if_out_nbr( frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_ptr(), + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), e_op); } else { detail::for_all_frontier_row_for_all_nbr_low_degree<<(payload_buffer), + buffer_idx.data(), e_op); } } @@ -491,18 +477,12 @@ void update_frontier_v_push_if_out_nbr( // 2. reduce the buffer - auto num_buffer_offset = edge_t{0}; - - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - - auto num_buffer_elements = detail::reduce_buffer_elements(handle, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_value(), - reduce_op); - + auto num_buffer_elements = + detail::reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); @@ -513,12 +493,9 @@ void update_frontier_v_push_if_out_nbr( auto const col_comm_rank = col_comm.get_rank(); auto const col_comm_size = col_comm.get_size(); - std::vector h_vertex_lasts(graph_view.is_hypergraph_partitioned() ? row_comm_size - : col_comm_size); + std::vector h_vertex_lasts(row_comm_size); for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { - h_vertex_lasts[i] = graph_view.get_vertex_partition_last( - graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i - : row_comm_rank * col_comm_size + i); + h_vertex_lasts[i] = graph_view.get_vertex_partition_last(col_comm_rank * row_comm_size + i); } rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); @@ -527,8 +504,8 @@ void update_frontier_v_push_if_out_nbr( rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), handle.get_stream()); thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_first, - buffer_key_first + num_buffer_elements, + keys.begin(), + keys.begin() + num_buffer_elements, d_vertex_lasts.begin(), d_vertex_lasts.end(), d_tx_buffer_last_boundaries.begin()); @@ -537,122 +514,35 @@ void update_frontier_v_push_if_out_nbr( d_tx_buffer_last_boundaries.data(), d_tx_buffer_last_boundaries.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + handle.get_stream_view().synchronize(); std::vector tx_counts(h_tx_buffer_last_boundaries.size()); std::adjacent_difference( h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - std::vector rx_counts(graph_view.is_hypergraph_partitioned() ? row_comm_size - : col_comm_size); - std::vector count_requests(tx_counts.size() + rx_counts.size()); - size_t tx_self_i = std::numeric_limits::max(); - for (size_t i = 0; i < tx_counts.size(); ++i) { - auto comm_dst_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : row_comm_rank * col_comm_size + static_cast(i); - if (comm_dst_rank == comm_rank) { - tx_self_i = i; - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms - count_requests[i] = std::numeric_limits::max(); - } else { - comm.isend(&tx_counts[i], 1, comm_dst_rank, 0 /* tag */, count_requests.data() + i); - } - } - for (size_t i = 0; i < rx_counts.size(); ++i) { - auto comm_src_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : static_cast(i) * row_comm_size + comm_rank / col_comm_size; - if (comm_src_rank == comm_rank) { - assert(tx_self_i != std::numeric_limits::max()); - rx_counts[i] = tx_counts[tx_self_i]; - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms - count_requests[tx_counts.size() + i] = std::numeric_limits::max(); - } else { - comm.irecv(&rx_counts[i], - 1, - comm_src_rank, - 0 /* tag */, - count_requests.data() + tx_counts.size() + i); - } - } - // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms, if - // raft::comms::wait immediately returns on seeing request_null, this remove is unnecessary - count_requests.erase(std::remove(count_requests.begin(), - count_requests.end(), - std::numeric_limits::max()), - count_requests.end()); - comm.waitall(count_requests.size(), count_requests.data()); - - std::vector tx_offsets(tx_counts.size() + 1, edge_t{0}); - std::partial_sum(tx_counts.begin(), tx_counts.end(), tx_offsets.begin() + 1); - std::vector rx_offsets(rx_counts.size() + 1, edge_t{0}); - std::partial_sum(rx_counts.begin(), rx_counts.end(), rx_offsets.begin() + 1); - - // FIXME: this will require costly reallocation if we don't use the new CUDA feature to reserve - // address space. - // FIXME: std::max(actual size, 1) as ncclRecv currently hangs if recvuff is nullptr even if - // count is 0 - vertex_frontier.resize_buffer(std::max(num_buffer_elements + rx_offsets.back(), size_t(1))); - - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - - std::vector tx_dst_ranks(tx_counts.size()); - std::vector rx_src_ranks(rx_counts.size()); - for (size_t i = 0; i < tx_dst_ranks.size(); ++i) { - tx_dst_ranks[i] = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : row_comm_rank * col_comm_size + static_cast(i); - } - for (size_t i = 0; i < rx_src_ranks.size(); ++i) { - rx_src_ranks[i] = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + static_cast(i) - : static_cast(i) * row_comm_size + comm_rank / col_comm_size; - } - - device_multicast_sendrecv( - comm, - buffer_key_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - buffer_key_first + num_buffer_elements, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - device_multicast_sendrecv( - comm, - buffer_payload_first, - tx_counts, - tx_offsets, - tx_dst_ranks, - buffer_payload_first + num_buffer_elements, - rx_counts, - rx_offsets, - rx_src_ranks, - handle.get_stream()); - - // FIXME: this does not exploit the fact that each segment is sorted. Lost performance - // optimization opportunities. - // FIXME: we can use [vertex_frontier.buffer_begin(), vertex_frontier.buffer_begin() + - // num_buffer_elements) as temporary buffer inside reduce_buffer_elements(). - num_buffer_offset = num_buffer_elements; - num_buffer_elements = detail::reduce_buffer_elements(handle, - buffer_key_first + num_buffer_elements, - buffer_payload_first + num_buffer_elements, - rx_offsets.back(), - reduce_op); + rmm::device_uvector rx_keys(size_t{0}, handle.get_stream()); + std::tie(rx_keys, std::ignore) = + shuffle_values(row_comm, keys.begin(), tx_counts, handle.get_stream()); + keys = std::move(rx_keys); + + auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = + shuffle_values(row_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + + num_buffer_elements = + detail::reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + keys.size(), + reduce_op); } // 3. update vertex properties if (num_buffer_elements > 0) { - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; - auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - raft::grid_1d_thread_t update_grid(num_buffer_elements, detail::update_frontier_v_push_if_out_nbr_update_block_size, handle.get_device_properties().maxGridSize[0]); @@ -666,8 +556,8 @@ void update_frontier_v_push_if_out_nbr( detail::update_frontier_and_vertex_output_values <<>>( vertex_partition, - buffer_key_first, - buffer_payload_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), num_buffer_elements, vertex_value_input_first, vertex_value_output_first, @@ -690,21 +580,5 @@ void update_frontier_v_push_if_out_nbr( } } -/* - -FIXME: - -iterating over lower triangular (or upper triangular) : triangle counting -LRB might be necessary if the cost of processing an edge (i, j) is a function of degree(i) and -degree(j) : triangle counting -push-pull switching support (e.g. DOBFS), in this case, we need both -CSR & CSC (trade-off execution time vs memory requirement, unless graph is symmetric) -if graph is symmetric, there will be additional optimization opportunities (e.g. in-degree == -out-degree) For BFS, sending a bit vector (for the entire set of dest vertices per partitoin may -work better we can use thrust::set_intersection for triangle counting think about adding thrust -wrappers for reduction functions. Can I pass nullptr for dummy -instead of thrust::make_counting_iterator(0)? -*/ - } // namespace experimental } // namespace cugraph diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index c11142d3cf7..375ec097850 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -48,26 +48,6 @@ inline size_t round_up(size_t number_to_round, size_t modulus) return ((number_to_round + (modulus - 1)) / modulus) * modulus; } -template -auto make_buffer_zip_iterator_impl(std::vector& buffer_ptrs, - size_t offset, - std::index_sequence) -{ - auto key_ptr = reinterpret_cast(buffer_ptrs[0]) + offset; - auto payload_it = thrust::make_zip_iterator( - thrust::make_tuple(reinterpret_cast::type*>( - buffer_ptrs[1 + Is])...)); - return std::make_tuple(key_ptr, payload_it); -} - -template -auto make_buffer_zip_iterator(std::vector& buffer_ptrs, size_t offset) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - return make_buffer_zip_iterator_impl( - buffer_ptrs, offset, std::make_index_sequence()); -} - template __global__ void move_and_invalidate_if(RowIterator row_first, RowIterator row_last, @@ -199,10 +179,7 @@ class Bucket { size_t size_{0}; }; -template +template class VertexFrontier { public: static size_t constexpr kNumBuckets = num_buckets; @@ -211,9 +188,7 @@ class VertexFrontier { VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) : handle_ptr_(&handle), tmp_bucket_ptrs_(num_buckets, handle.get_stream()), - tmp_bucket_sizes_(num_buckets, handle.get_stream()), - buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr), - buffer_idx_(0, handle_ptr_->get_stream()) + tmp_bucket_sizes_(num_buckets, handle.get_stream()) { CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, "invalid input argument bucket_capacities (size mismatch)"); @@ -228,7 +203,6 @@ class VertexFrontier { for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle, bucket_capacities[i]); } - buffer_.set_stream(handle_ptr_->get_stream()); } Bucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } @@ -311,90 +285,11 @@ class VertexFrontier { return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); } - void resize_buffer(size_t size) - { - // FIXME: rmm::device_buffer resize incurs copy if memory is reallocated, which is unnecessary - // in this case. - buffer_.resize(compute_aggregate_buffer_size_in_bytes(size), handle_ptr_->get_stream()); - if (size > buffer_capacity_) { - buffer_capacity_ = size; - update_buffer_ptrs(); - } - buffer_size_ = size; - } - - void clear_buffer() { resize_buffer(0); } - - void shrink_to_fit_buffer() - { - if (buffer_size_ != buffer_capacity_) { - // FIXME: rmm::device_buffer shrink_to_fit incurs copy if memory is reallocated, which is - // unnecessary in this case. - buffer_.shrink_to_fit(handle_ptr_->get_stream()); - update_buffer_ptrs(); - buffer_capacity_ = buffer_size_; - } - } - - auto buffer_begin() - { - return detail::make_buffer_zip_iterator(buffer_ptrs_, 0); - } - - auto buffer_end() - { - return detail::make_buffer_zip_iterator(buffer_ptrs_, - buffer_size_); - } - - auto get_buffer_idx_ptr() { return buffer_idx_.data(); } - - size_t get_buffer_idx_value() { return buffer_idx_.value(handle_ptr_->get_stream()); } - - void set_buffer_idx_value(size_t value) - { - buffer_idx_.set_value(value, handle_ptr_->get_stream()); - } - private: - static size_t constexpr kReduceInputTupleSize = thrust::tuple_size::value; - static size_t constexpr kBufferAlignment = 128; - raft::handle_t const* handle_ptr_{nullptr}; std::vector> buckets_{}; rmm::device_uvector tmp_bucket_ptrs_; rmm::device_uvector tmp_bucket_sizes_; - - std::array tuple_element_sizes_ = - compute_thrust_tuple_element_sizes()(); - std::vector buffer_ptrs_{}; - rmm::device_buffer buffer_{}; - size_t buffer_size_{0}; - size_t buffer_capacity_{0}; - rmm::device_scalar buffer_idx_{}; - - // FIXME: better pick between this apporach or the approach used in allocate_comm_buffer - size_t compute_aggregate_buffer_size_in_bytes(size_t size) - { - size_t aggregate_buffer_size_in_bytes = - detail::round_up(sizeof(vertex_t) * size, kBufferAlignment); - for (size_t i = 0; i < kReduceInputTupleSize; ++i) { - aggregate_buffer_size_in_bytes += - detail::round_up(tuple_element_sizes_[i] * size, kBufferAlignment); - } - return aggregate_buffer_size_in_bytes; - } - - void update_buffer_ptrs() - { - uintptr_t ptr = reinterpret_cast(buffer_.data()); - buffer_ptrs_[0] = reinterpret_cast(ptr); - ptr += detail::round_up(sizeof(vertex_t) * buffer_capacity_, kBufferAlignment); - for (size_t i = 0; i < kReduceInputTupleSize; ++i) { - buffer_ptrs_[1 + i] = reinterpret_cast(ptr); - ptr += detail::round_up(tuple_element_sizes_[i] * buffer_capacity_, kBufferAlignment); - } - } }; } // namespace experimental diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh index 8d2227c0f60..481717d7c38 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/utilities/collect_comm.cuh @@ -58,13 +58,18 @@ collect_values_for_keys(raft::comms::comms_t const &comm, double constexpr load_factor = 0.7; // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary - // search based approach + // search based approach (especially when thrust::distance(collect_key_first, collect_key_last) << + // thrust::distance(map_key_first, map_key_last) // 1. build a cuco::static_map object for the map k, v pairs. auto kv_map_ptr = std::make_unique>( - static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / - load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, invalid_vertex_id::value); { @@ -73,7 +78,11 @@ collect_values_for_keys(raft::comms::comms_t const &comm, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last) > 0) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } } // 2. collect values for the unique keys in [collect_key_first, collect_key_last) @@ -82,9 +91,6 @@ collect_values_for_keys(raft::comms::comms_t const &comm, stream); thrust::copy( rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin()); - // FIXME: sort and unique are unnecessary if the keys in [collect_key_first, collect_key_last) are - // already unique, if this cost becomes a performance bottlenec, we may add - // collect_values_for_unique_keys in the future thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end()); unique_keys.resize( thrust::distance( @@ -107,8 +113,12 @@ collect_values_for_keys(raft::comms::comms_t const &comm, CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream - kv_map_ptr->find( - rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_keys.size() > 0) { + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + } rmm::device_uvector rx_values_for_unique_keys(0, stream); std::tie(rx_values_for_unique_keys, std::ignore) = @@ -125,7 +135,11 @@ collect_values_for_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); kv_map_ptr = std::make_unique>( - static_cast(static_cast(unique_keys.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_keys.size()) / load_factor), + unique_keys.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value); { @@ -136,15 +150,154 @@ collect_values_for_keys(raft::comms::comms_t const &comm, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } } // 4. find values for [collect_key_first, collect_key_last) auto value_buffer = allocate_dataframe_buffer( thrust::distance(collect_key_first, collect_key_last), stream); - kv_map_ptr->find( - collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(collect_key_first, collect_key_last) > 0) { + kv_map_ptr->find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer)); + } + + return value_buffer; +} + +// for key = [map_key_first, map_key_last), key_to_gpu_id_op(key) should be coincide with +// comm.get_rank() +template +decltype(allocate_dataframe_buffer::value_type>( + 0, cudaStream_t{nullptr})) +collect_values_for_unique_keys(raft::comms::comms_t const &comm, + VertexIterator0 map_key_first, + VertexIterator0 map_key_last, + ValueIterator map_value_first, + VertexIterator1 collect_unique_key_first, + VertexIterator1 collect_unique_key_last, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + using vertex_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, vertex_t>::value); + using value_t = typename std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary + // search based approach (especially when thrust::distance(collect_unique_key_first, + // collect_unique_key_last) << thrust::distance(map_key_first, map_key_last) + + // 1. build a cuco::static_map object for the map k, v pairs. + + auto kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), + static_cast(thrust::distance(map_key_first, map_key_last)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(map_key_first, map_key_last)) { + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + } + } + + // 2. collect values for the unique keys in [collect_unique_key_first, collect_unique_key_last) + + rmm::device_uvector unique_keys( + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); + thrust::copy(rmm::exec_policy(stream)->on(stream), + collect_unique_key_first, + collect_unique_key_last, + unique_keys.begin()); + + rmm::device_uvector values_for_unique_keys(0, stream); + { + rmm::device_uvector rx_unique_keys(0, stream); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( + comm, + unique_keys.begin(), + unique_keys.end(), + [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, + stream); + + rmm::device_uvector values_for_rx_unique_keys(rx_unique_keys.size(), stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_keys.size() > 0) { + kv_map_ptr->find( + rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin()); + } + + rmm::device_uvector rx_values_for_unique_keys(0, stream); + std::tie(rx_values_for_unique_keys, std::ignore) = + shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream); + + values_for_unique_keys = std::move(rx_values_for_unique_keys); + } + + // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys, + // values_for_unique_keys. + + CUDA_TRY(cudaStreamSynchronize(stream)); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_keys.size()) / load_factor), + unique_keys.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + { + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_keys.size() > 0) { kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } + } + + // 4. find values for [collect_unique_key_first, collect_unique_key_last) + + auto value_buffer = allocate_dataframe_buffer( + thrust::distance(collect_unique_key_first, collect_unique_key_last), stream); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (thrust::distance(collect_unique_key_first, collect_unique_key_last)) { + kv_map_ptr->find(collect_unique_key_first, + collect_unique_key_last, + get_dataframe_buffer_begin(value_buffer)); + } return value_buffer; } diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index a58331d465a..d8c476760f0 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -93,7 +93,7 @@ struct graph_container_t { void* weights; void* vertex_partition_offsets; - size_t num_partition_edges; + size_t num_local_edges; size_t num_global_vertices; size_t num_global_edges; numberTypeEnum vertexType; @@ -103,7 +103,6 @@ struct graph_container_t { bool is_multi_gpu; bool sorted_by_degree; bool do_expensive_check; - bool hypergraph_partitioned; int row_comm_size; int col_comm_size; int row_comm_rank; @@ -147,7 +146,7 @@ struct cy_multi_edgelists_t { // replacement for std::tuple<,,>, since std::tuple is not // supported in cython // -template +template struct major_minor_weights_t { explicit major_minor_weights_t(raft::handle_t const& handle) : shuffled_major_vertices_(0, handle.get_stream()), @@ -155,12 +154,15 @@ struct major_minor_weights_t { shuffled_weights_(0, handle.get_stream()) { } + rmm::device_uvector& get_major(void) { return shuffled_major_vertices_; } rmm::device_uvector& get_minor(void) { return shuffled_minor_vertices_; } rmm::device_uvector& get_weights(void) { return shuffled_weights_; } + std::vector& get_edge_counts(void) { return edge_counts_; } + std::pair, size_t> get_major_wrap( void) // const: triggers errors in Cython autogen-ed C++ { @@ -180,10 +182,16 @@ struct major_minor_weights_t { sizeof(weight_t)); } + std::unique_ptr> get_edge_counts_wrap(void) // const + { + return std::make_unique>(edge_counts_); + } + private: rmm::device_uvector shuffled_major_vertices_; rmm::device_uvector shuffled_minor_vertices_; rmm::device_uvector shuffled_weights_; + std::vector edge_counts_{}; }; // aggregate for random_walks() return type @@ -353,6 +361,9 @@ struct renum_quad_t { // The number of vertices and edges respectively in the graph represented by // the above arrays. // +// bool is_weighted +// true if the resulting graph object should store edge weights +// // bool transposed // true if the resulting graph object should store a transposed adjacency // matrix @@ -369,10 +380,11 @@ void populate_graph_container(graph_container_t& graph_container, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu); @@ -470,14 +482,13 @@ call_random_walks(raft::handle_t const& handle, // wrapper for shuffling: // template -std::unique_ptr> call_shuffle( +std::unique_ptr> call_shuffle( raft::handle_t const& handle, vertex_t* edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned); // = false + edge_t num_edgelist_edges); // Wrapper for calling renumber_edeglist() inplace: // @@ -486,8 +497,7 @@ std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index 06352b8e217..e59b12f2a80 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -47,21 +47,19 @@ auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, } template -void resize_dataframe_buffer_tuple_element_impl(BufferType& buffer, - size_t new_buffer_size, - cudaStream_t stream) -{ - std::get(buffer).resize(new_buffer_size, stream); - resize_dataframe_buffer_tuple_element_impl( - buffer, new_buffer_size, stream); -} +struct resize_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) + { + std::get(buffer).resize(new_buffer_size, stream); + resize_dataframe_buffer_tuple_iterator_element_impl().run( + buffer, new_buffer_size, stream); + } +}; template -void resize_dataframe_buffer_tuple_impl(BufferType& buffer, - size_t new_buffer_size, - cudaStream_t stream) -{ -} +struct resize_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) {} +}; template auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) @@ -108,8 +106,9 @@ template ::value; - detail::resize_dataframe_buffer_tuple_impl( - buffer, new_buffer_size, stream); + detail:: + resize_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, new_buffer_size, stream); } template +std::enable_if_t::value, void> +device_allreduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allreduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + comm.allreduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, stream); +} + +template +struct device_allreduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) const + { + device_allreduce_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + count, + op, + stream); + device_allreduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, stream); + } +}; + +template +struct device_allreduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) const + { + } +}; + template std::enable_if_t::value, void> device_reduce_impl(raft::comms::comms_t const& comm, @@ -856,6 +916,46 @@ device_bcast(raft::comms::comms_t const& comm, comm, input_first, output_first, count, root, stream); } +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allreduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + detail::device_allreduce_impl(comm, input_first, output_first, count, op, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_allreduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail::device_allreduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, stream); +} + template std::enable_if_t< std::is_arithmetic::value_type>::value, diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh index 8c363c9a346..b318009d9bf 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -22,6 +22,12 @@ #include #include +#include +#include +#include +#include +#include + #include #include #include @@ -31,89 +37,6 @@ namespace experimental { namespace detail { -template -rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, - ValueIterator tx_value_first /* [INOUT */, - ValueIterator tx_value_last /* [INOUT */, - ValueToGPUIdOp value_to_gpu_id_op, - cudaStream_t stream) -{ - auto const comm_size = comm.get_size(); - - thrust::sort(rmm::exec_policy(stream)->on(stream), - tx_value_first, - tx_value_last, - [value_to_gpu_id_op] __device__(auto lhs, auto rhs) { - return value_to_gpu_id_op(lhs) < value_to_gpu_id_op(rhs); - }); - - auto gpu_id_first = thrust::make_transform_iterator( - tx_value_first, - [value_to_gpu_id_op] __device__(auto value) { return value_to_gpu_id_op(value); }); - rmm::device_uvector d_tx_dst_ranks(comm_size, stream); - rmm::device_uvector d_tx_value_counts(comm_size, stream); - auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_value_first, tx_value_last), - thrust::make_constant_iterator(size_t{1}), - d_tx_dst_ranks.begin(), - d_tx_value_counts.begin()); - if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { - rmm::device_uvector d_counts(comm_size, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), - d_tx_value_counts.begin(), - thrust::get<1>(last), - d_tx_dst_ranks.begin(), - d_counts.begin()); - d_tx_value_counts = std::move(d_counts); - } - - return d_tx_value_counts; -} - -template -rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, - VertexIterator tx_key_first /* [INOUT */, - VertexIterator tx_key_last /* [INOUT */, - ValueIterator tx_value_first /* [INOUT */, - KeyToGPUIdOp key_to_gpu_id_op, - cudaStream_t stream) -{ - auto const comm_size = comm.get_size(); - - thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), - tx_key_first, - tx_key_last, - tx_value_first, - [key_to_gpu_id_op] __device__(auto lhs, auto rhs) { - return key_to_gpu_id_op(lhs) < key_to_gpu_id_op(rhs); - }); - - auto gpu_id_first = thrust::make_transform_iterator( - tx_key_first, [key_to_gpu_id_op] __device__(auto key) { return key_to_gpu_id_op(key); }); - rmm::device_uvector d_tx_dst_ranks(comm_size, stream); - rmm::device_uvector d_tx_value_counts(comm_size, stream); - auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - gpu_id_first, - gpu_id_first + thrust::distance(tx_key_first, tx_key_last), - thrust::make_constant_iterator(size_t{1}), - d_tx_dst_ranks.begin(), - d_tx_value_counts.begin()); - if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { - rmm::device_uvector d_counts(comm_size, stream); - thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); - thrust::scatter(rmm::exec_policy(stream)->on(stream), - d_tx_value_counts.begin(), - thrust::get<1>(last), - d_tx_dst_ranks.begin(), - d_counts.begin()); - d_tx_value_counts = std::move(d_counts); - } - - return d_tx_value_counts; -} - // inline to suppress a complaint about ODR violation inline std::tuple, std::vector, @@ -187,6 +110,86 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, } // namespace detail +template +rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_group_id_op, + int num_groups, + cudaStream_t stream) +{ + thrust::sort(rmm::exec_policy(stream)->on(stream), + tx_value_first, + tx_value_last, + [value_to_group_id_op] __device__(auto lhs, auto rhs) { + return value_to_group_id_op(lhs) < value_to_group_id_op(rhs); + }); + + auto group_id_first = thrust::make_transform_iterator( + tx_value_first, + [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); + rmm::device_uvector d_tx_dst_ranks(num_groups, stream); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); + auto last = + thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + group_id_first, + group_id_first + thrust::distance(tx_value_first, tx_value_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { + rmm::device_uvector d_counts(num_groups, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return d_tx_value_counts; +} + +template +rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_group_id_op, + int num_groups, + cudaStream_t stream) +{ + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + tx_key_first, + tx_key_last, + tx_value_first, + [key_to_group_id_op] __device__(auto lhs, auto rhs) { + return key_to_group_id_op(lhs) < key_to_group_id_op(rhs); + }); + + auto group_id_first = thrust::make_transform_iterator( + tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); + rmm::device_uvector d_tx_dst_ranks(num_groups, stream); + rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + group_id_first, + group_id_first + thrust::distance(tx_key_first, tx_key_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) { + rmm::device_uvector d_counts(num_groups, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return d_tx_value_counts; +} + template auto shuffle_values(raft::comms::comms_t const &comm, TxValueIterator tx_value_first, @@ -250,7 +253,7 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, auto const comm_size = comm.get_size(); auto d_tx_value_counts = - detail::sort_and_count(comm, tx_value_first, tx_value_last, value_to_gpu_id_op, stream); + groupby_and_count(tx_value_first, tx_value_last, value_to_gpu_id_op, comm.get_size(), stream); std::vector tx_counts{}; std::vector tx_offsets{}; @@ -301,8 +304,8 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, { auto const comm_size = comm.get_size(); - auto d_tx_value_counts = detail::sort_and_count( - comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); + auto d_tx_value_counts = groupby_and_count( + tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, comm.get_size(), stream); std::vector tx_counts{}; std::vector tx_offsets{}; diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh index 01843a583eb..d5ce6ff1a29 100644 --- a/cpp/include/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -61,13 +61,6 @@ struct compute_thrust_tuple_element_sizes_impl { void compute(std::array::value>& arr) const {} }; -template -__device__ constexpr auto remove_first_thrust_tuple_element_impl(TupleType const& tuple, - std::index_sequence) -{ - return thrust::make_tuple(thrust::get<1 + Is>(tuple)...); -} - template struct plus_thrust_tuple_impl { __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const @@ -200,16 +193,6 @@ struct compute_thrust_tuple_element_sizes { } }; -template -struct remove_first_thrust_tuple_element { - __device__ constexpr auto operator()(TupleType const& tuple) const - { - size_t constexpr tuple_size = thrust::tuple_size::value; - return detail::remove_first_thrust_tuple_element_impl( - tuple, std::make_index_sequence()); - } -}; - template struct plus_thrust_tuple { __host__ __device__ constexpr TupleType operator()(TupleType const& lhs, diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index 45f7d723191..a176dfbd1c8 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -117,7 +117,7 @@ class EcgLouvain : public cugraph::Louvain { void initialize_dendrogram_level(vertex_t num_vertices) override { - this->dendrogram_->add_level(0, num_vertices); + this->dendrogram_->add_level(0, num_vertices, this->stream_); get_permutation_vector( num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index aae2d3712b5..4ffb7c20eb2 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -132,7 +132,7 @@ class Leiden : public Louvain { // // Initialize every cluster to reference each vertex to itself // - this->dendrogram_->add_level(0, current_graph.number_of_vertices); + this->dendrogram_->add_level(0, current_graph.number_of_vertices, this->stream_); thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->dendrogram_->current_level_begin(), diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 0862bbc62a9..e3569d4c850 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -210,7 +210,7 @@ class Louvain { virtual void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(0, num_vertices); + dendrogram_->add_level(0, num_vertices, stream_); thrust::sequence(rmm::exec_policy(stream_)->on(stream_), dendrogram_->current_level_begin(), diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 7adfbd7fbd7..9145e3737b6 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -93,10 +93,7 @@ void bfs(raft::handle_t const &handle, enum class Bucket { cur, num_buckets }; std::vector bucket_sizes(static_cast(Bucket::num_buckets), push_graph_view.get_number_of_local_vertices()); - VertexFrontier, - vertex_t, - GraphViewType::is_multi_gpu, - static_cast(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { @@ -133,19 +130,16 @@ void bfs(raft::handle_t const &handle, *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst)); if (distance != invalid_distance) { push = false; } } - // FIXME: need to test this works properly if payload size is 0 (returns a tuple of size - // 1) return thrust::make_tuple(push, src); }, - reduce_op::any>(), + reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), vertex_frontier, [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) - ? static_cast(Bucket::cur) - : VertexFrontier, vertex_t>::kInvalidBucketIdx; - return thrust::make_tuple(idx, depth + 1, thrust::get<0>(pushed_val)); + auto idx = (v_val == invalid_distance) ? static_cast(Bucket::cur) + : VertexFrontier::kInvalidBucketIdx; + return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); }); auto new_vertex_frontier_aggregate_size = diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 0cd551b0d73..1eccbd23584 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -49,6 +50,7 @@ std:: weight_t const *compressed_sparse_weights, vertex_t major_first, vertex_t major_last, + bool is_weighted, cudaStream_t stream) { edge_t number_of_edges{0}; @@ -57,8 +59,7 @@ std:: CUDA_TRY(cudaStreamSynchronize(stream)); rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); - rmm::device_uvector edgelist_weights( - compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + rmm::device_uvector edgelist_weights(is_weighted ? number_of_edges : 0, stream); // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA @@ -77,7 +78,7 @@ std:: compressed_sparse_indices, compressed_sparse_indices + number_of_edges, edgelist_minor_vertices.begin()); - if (compressed_sparse_weights != nullptr) { + if (is_weighted) { thrust::copy(rmm::exec_policy(stream)->on(stream), compressed_sparse_weights, compressed_sparse_weights + number_of_edges, @@ -89,62 +90,62 @@ std:: std::move(edgelist_weights)); } -template -void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, - rmm::device_uvector &edgelist_weights /* [INOUT] */, - cudaStream_t stream) +template +edge_t groupby_e_and_coarsen_edgelist(vertex_t *edgelist_major_vertices /* [INOUT] */, + vertex_t *edgelist_minor_vertices /* [INOUT] */, + weight_t *edgelist_weights /* [INOUT] */, + edge_t number_of_edges, + bool is_weighted, + cudaStream_t stream) { - auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); - size_t number_of_edges{0}; - if (edgelist_weights.size() > 0) { + if (is_weighted) { thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin()); + pair_first + number_of_edges, + edgelist_weights); - rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), - stream); + rmm::device_uvector tmp_edgelist_major_vertices(number_of_edges, stream); rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), stream); rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); auto it = thrust::reduce_by_key( rmm::exec_policy(stream)->on(stream), pair_first, - pair_first + edgelist_major_vertices.size(), - edgelist_weights.begin(), + pair_first + number_of_edges, + edgelist_weights, thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), tmp_edgelist_minor_vertices.begin())), tmp_edgelist_weights.begin()); - number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + auto ret = + static_cast(thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it))); - edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); - edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); - edgelist_weights = std::move(tmp_edgelist_weights); + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin(), + tmp_edgelist_weights.begin())); + thrust::copy(rmm::exec_policy(stream)->on(stream), + edge_first, + edge_first + ret, + thrust::make_zip_iterator(thrust::make_tuple( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights))); + + return ret; } else { - thrust::sort(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), - pair_first, - pair_first + edgelist_major_vertices.size()); - number_of_edges = thrust::distance(pair_first, it); + thrust::sort(rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges); + return static_cast(thrust::distance( + pair_first, + thrust::unique( + rmm::exec_policy(stream)->on(stream), pair_first, pair_first + number_of_edges))); } - - edgelist_major_vertices.resize(number_of_edges, stream); - edgelist_minor_vertices.resize(number_of_edges, stream); - edgelist_weights.resize(number_of_edges, stream); - edgelist_major_vertices.shrink_to_fit(stream); - edgelist_minor_vertices.shrink_to_fit(stream); - edgelist_weights.shrink_to_fit(stream); } template std:: tuple, rmm::device_uvector, rmm::device_uvector> - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( edge_t const *compressed_sparse_offsets, vertex_t const *compressed_sparse_indices, weight_t const *compressed_sparse_weights, @@ -154,6 +155,7 @@ std:: vertex_t major_last, vertex_t minor_first, vertex_t minor_last, + bool is_weighted, cudaStream_t stream) { // FIXME: it might be possible to directly create relabled & coarsened edgelist from the @@ -168,6 +170,7 @@ std:: compressed_sparse_weights, major_first, major_last, + is_weighted, stream); auto pair_first = thrust::make_zip_iterator( @@ -182,8 +185,21 @@ std:: p_minor_labels[thrust::get<1>(val) - minor_first]); }); - sort_and_coarsen_edgelist( - edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + auto number_of_edges = + groupby_e_and_coarsen_edgelist(edgelist_major_vertices.data(), + edgelist_minor_vertices.data(), + edgelist_weights.data(), + static_cast(edgelist_major_vertices.size()), + is_weighted, + stream); + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.shrink_to_fit(stream); + if (is_weighted) { + edgelist_weights.resize(number_of_edges, stream); + edgelist_weights.shrink_to_fit(stream); + } return std::make_tuple(std::move(edgelist_major_vertices), std::move(edgelist_minor_vertices), @@ -220,48 +236,66 @@ coarsen_graph( // currently, nothing to do } - // 1. locally construct coarsened edge list + // 1. construct coarsened edge list - // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme - // regardless of hypergraph partitioning is applied or not - rmm::device_uvector adj_matrix_major_labels( - store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() - : graph_view.get_number_of_local_adj_matrix_partition_rows(), - handle.get_stream()); rmm::device_uvector adj_matrix_minor_labels( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() : graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); if (store_transposed) { - copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); } else { - copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); } - rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::vector> coarsened_edgelist_major_vertices{}; + std::vector> coarsened_edgelist_minor_vertices{}; + std::vector> coarsened_edgelist_weights{}; + coarsened_edgelist_major_vertices.reserve(graph_view.get_number_of_local_adj_matrix_partitions()); + coarsened_edgelist_minor_vertices.reserve(coarsened_edgelist_major_vertices.size()); + coarsened_edgelist_weights.reserve( + graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : size_t{0}); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + coarsened_edgelist_major_vertices.emplace_back(0, handle.get_stream()); + coarsened_edgelist_minor_vertices.emplace_back(0, handle.get_stream()); + if (graph_view.is_weighted()) { + coarsened_edgelist_weights.emplace_back(0, handle.get_stream()); + } + } // FIXME: we may compare performance/memory footprint with the hash_based approach especially when // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst // case). We may be able to limit the memory requirement close to the final coarsened edgelist // with the hash based approach. for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - // get edge list + // 1-1. locally construct coarsened edge list + + rmm::device_uvector major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) + : graph_view.get_number_of_local_adj_matrix_partition_rows(i), + handle.get_stream()); + // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for + // input + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + major_labels.size(), + major_labels.begin()); + device_bcast(col_comm, + major_labels.data(), + major_labels.data(), + major_labels.size(), + static_cast(i), + handle.get_stream()); rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); rmm::device_uvector edgelist_weights(0, handle.get_stream()); std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( graph_view.offsets(i), graph_view.indices(i), graph_view.weights(i), - adj_matrix_major_labels.begin() + - (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) - : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), - adj_matrix_minor_labels.begin(), + major_labels.data(), + adj_matrix_minor_labels.data(), store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) : graph_view.get_local_adj_matrix_partition_row_first(i), store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) @@ -270,86 +304,159 @@ coarsen_graph( : graph_view.get_local_adj_matrix_partition_col_first(i), store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) : graph_view.get_local_adj_matrix_partition_col_last(i), + graph_view.is_weighted(), handle.get_stream()); - auto cur_size = coarsened_edgelist_major_vertices.size(); - // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can - // reserve address space to avoid expensive reallocation. - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management - coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), - handle.get_stream()); - coarsened_edgelist_weights.resize( - graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); - - if (graph_view.is_weighted()) { - auto src_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), - edgelist_minor_vertices.begin(), - edgelist_weights.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); - } else { - auto src_edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); - auto dst_edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin())) + - cur_size; - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - src_edge_first, - src_edge_first + edgelist_major_vertices.size(), - dst_edge_first); + // 1-2. globaly shuffle + + { + rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); + if (graph_view.is_weighted()) { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())); + std::forward_as_tuple( + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + std::forward_as_tuple(std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + edgelist_major_vertices = std::move(rx_edgelist_major_vertices); + edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); + edgelist_weights = std::move(rx_edgelist_weights); } - } - sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, - coarsened_edgelist_minor_vertices, - coarsened_edgelist_weights, - handle.get_stream()); - - // 2. globally shuffle edge list and re-coarsen - - { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), - coarsened_edgelist_minor_vertices.begin(), - coarsened_edgelist_weights.begin())); - rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); - rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); - std::forward_as_tuple( - std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), - std::ignore) = - groupby_gpuid_and_shuffle_values( - handle.get_comms(), - edge_first, - edge_first + coarsened_edgelist_major_vertices.size(), - [key_func = - detail::compute_gpu_id_from_edge_t{graph_view.is_hypergraph_partitioned(), - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, + // 1-3. append data to local adjacency matrix partitions + + // FIXME: we can skip this if groupby_gpuid_and_shuffle_values is updated to return sorted edge + // list based on the final matrix partition (maybe add + // groupby_adj_matrix_partition_and_shuffle_values). + + auto local_partition_id_op = + [comm_size, + key_func = detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto counts = graph_view.is_weighted() + ? groupby_and_count(pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + local_partition_id_op, + graph_view.get_number_of_local_adj_matrix_partitions(), + handle.get_stream()) + : groupby_and_count(pair_first, + pair_first + edgelist_major_vertices.size(), + local_partition_id_op, + graph_view.get_number_of_local_adj_matrix_partitions(), + handle.get_stream()); + + std::vector h_counts(counts.size()); + raft::update_host(h_counts.data(), counts.data(), counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + std::vector h_displacements(h_counts.size(), size_t{0}); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_displacements.begin() + 1); + + for (int j = 0; j < col_comm_size; ++j) { + auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( + edgelist_major_vertices.begin() + h_displacements[j], + edgelist_minor_vertices.begin() + h_displacements[j], + graph_view.is_weighted() ? edgelist_weights.begin() + h_displacements[j] + : static_cast(nullptr), + h_counts[j], + graph_view.is_weighted(), handle.get_stream()); - sort_and_coarsen_edgelist(rx_edgelist_major_vertices, - rx_edgelist_minor_vertices, - rx_edgelist_weights, - handle.get_stream()); + auto cur_size = coarsened_edgelist_major_vertices[j].size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we + // can reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + coarsened_edgelist_major_vertices[j].resize(cur_size + number_of_partition_edges, + handle.get_stream()); + coarsened_edgelist_minor_vertices[j].resize(coarsened_edgelist_major_vertices[j].size(), + handle.get_stream()); + if (graph_view.is_weighted()) { + coarsened_edgelist_weights[j].resize(coarsened_edgelist_major_vertices[j].size(), + handle.get_stream()); + + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())) + + h_displacements[j]; + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices[j].begin(), + coarsened_edgelist_minor_vertices[j].begin(), + coarsened_edgelist_weights[j].begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + number_of_partition_edges, + dst_edge_first); + } else { + auto src_edge_first = thrust::make_zip_iterator(thrust::make_tuple( + edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())) + + h_displacements[j]; + auto dst_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(coarsened_edgelist_major_vertices[j].begin(), + coarsened_edgelist_minor_vertices[j].begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } + } + } - coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); - coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); - coarsened_edgelist_weights = std::move(rx_edgelist_weights); + for (size_t i = 0; i < coarsened_edgelist_major_vertices.size(); ++i) { + auto number_of_partition_edges = groupby_e_and_coarsen_edgelist( + coarsened_edgelist_major_vertices[i].data(), + coarsened_edgelist_minor_vertices[i].data(), + graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() + : static_cast(nullptr), + static_cast(coarsened_edgelist_major_vertices[i].size()), + graph_view.is_weighted(), + handle.get_stream()); + coarsened_edgelist_major_vertices[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_major_vertices[i].shrink_to_fit(handle.get_stream()); + coarsened_edgelist_minor_vertices[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_minor_vertices[i].shrink_to_fit(handle.get_stream()); + if (coarsened_edgelist_weights.size() > 0) { + coarsened_edgelist_weights[i].resize(number_of_partition_edges, handle.get_stream()); + coarsened_edgelist_weights[i].shrink_to_fit(handle.get_stream()); + } } // 3. find unique labels for this GPU @@ -395,37 +502,43 @@ coarsen_graph( rmm::device_uvector renumber_map_labels(0, handle.get_stream()); partition_t partition(std::vector(comm_size + 1, 0), - graph_view.is_hypergraph_partitioned(), row_comm_size, col_comm_size, row_comm_rank, col_comm_rank); vertex_t number_of_vertices{}; edge_t number_of_edges{}; - std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist( - handle, - unique_labels.data(), - static_cast(unique_labels.size()), - coarsened_edgelist_major_vertices.data(), - coarsened_edgelist_minor_vertices.data(), - static_cast(coarsened_edgelist_major_vertices.size()), - graph_view.is_hypergraph_partitioned(), - do_expensive_check); + { + std::vector major_ptrs(coarsened_edgelist_major_vertices.size()); + std::vector minor_ptrs(major_ptrs.size()); + std::vector counts(major_ptrs.size()); + for (size_t i = 0; i < coarsened_edgelist_major_vertices.size(); ++i) { + major_ptrs[i] = coarsened_edgelist_major_vertices[i].data(); + minor_ptrs[i] = coarsened_edgelist_minor_vertices[i].data(); + counts[i] = static_cast(coarsened_edgelist_major_vertices[i].size()); + } + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + renumber_edgelist(handle, + unique_labels.data(), + static_cast(unique_labels.size()), + major_ptrs, + minor_ptrs, + counts, + do_expensive_check); + } // 5. build a graph std::vector> edgelists{}; - if (graph_view.is_hypergraph_partitioned()) { - CUGRAPH_FAIL("unimplemented."); - } else { - edgelists.resize(1); - edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() - : coarsened_edgelist_major_vertices.data(); - edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() - : coarsened_edgelist_minor_vertices.data(); - edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); - edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + edgelists.resize(graph_view.get_number_of_local_adj_matrix_partitions()); + for (size_t i = 0; i < edgelists.size(); ++i) { + edgelists[i].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices[i].data() + : coarsened_edgelist_major_vertices[i].data(); + edgelists[i].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices[i].data() + : coarsened_edgelist_minor_vertices[i].data(); + edgelists[i].p_edge_weights = graph_view.is_weighted() ? coarsened_edgelist_weights[i].data() + : static_cast(nullptr); + edgelists[i].number_of_edges = static_cast(coarsened_edgelist_major_vertices[i].size()); } return std::make_tuple( @@ -435,7 +548,7 @@ coarsen_graph( partition, number_of_vertices, number_of_edges, - graph_properties_t{graph_view.is_symmetric(), false}, + graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, true), std::move(renumber_map_labels)); } @@ -466,7 +579,7 @@ coarsen_graph( std::tie(coarsened_edgelist_major_vertices, coarsened_edgelist_minor_vertices, coarsened_edgelist_weights) = - compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + compressed_sparse_to_relabeled_and_grouped_and_coarsened_edgelist( graph_view.offsets(), graph_view.indices(), graph_view.weights(), @@ -476,6 +589,7 @@ coarsen_graph( graph_view.get_number_of_vertices(), vertex_t{0}, graph_view.get_number_of_vertices(), + graph_view.is_weighted(), handle.get_stream()); rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), @@ -516,7 +630,7 @@ coarsen_graph( handle, edgelist, static_cast(renumber_map_labels.size()), - graph_properties_t{graph_view.is_symmetric(), false}, + graph_properties_t{graph_view.is_symmetric(), false, graph_view.is_weighted()}, true), std::move(renumber_map_labels)); } diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu index 185fa837a70..d75a4654a15 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/experimental/generate_rmat_edgelist.cu @@ -46,13 +46,13 @@ std::tuple, rmm::device_uvector> generat bool clip_and_flip, bool scramble_vertex_ids) { - CUGRAPH_EXPECTS(size_t{1} << scale <= std::numeric_limits::max(), + CUGRAPH_EXPECTS((size_t{1} << scale) <= static_cast(std::numeric_limits::max()), "Invalid input argument: scale too large for vertex_t."); CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0), "Invalid input argument: a, b, c should be non-negative and a + b + c should not " "be larger than 1.0."); - raft::random::Rng rng(seed + 10); + raft::random::Rng rng(seed); // to limit memory footprint (1024 is a tuning parameter) auto max_edges_to_generate_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * 1024; diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 5abe141dafd..47c41cb3426 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -67,12 +67,12 @@ std:: vertex_t major_last, vertex_t minor_first, vertex_t minor_last, + bool is_weighted, cudaStream_t stream) { rmm::device_uvector offsets((major_last - major_first) + 1, stream); rmm::device_uvector indices(edgelist.number_of_edges, stream); - rmm::device_uvector weights( - edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, stream); + rmm::device_uvector weights(is_weighted ? edgelist.number_of_edges : 0, stream); thrust::fill(rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), edge_t{0}); thrust::fill(rmm::exec_policy(stream)->on(stream), indices.begin(), indices.end(), vertex_t{0}); @@ -89,8 +89,7 @@ std:: auto p_offsets = offsets.data(); auto p_indices = indices.data(); - auto p_weights = - edgelist.p_edge_weights != nullptr ? weights.data() : static_cast(nullptr); + auto p_weights = is_weighted ? weights.data() : static_cast(nullptr); thrust::for_each(rmm::exec_policy(stream)->on(stream), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, @@ -103,7 +102,7 @@ std:: thrust::exclusive_scan( rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), offsets.begin()); - if (edgelist.p_edge_weights != nullptr) { + if (is_weighted) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); thrust::for_each(rmm::exec_policy(stream)->on(stream), @@ -191,24 +190,22 @@ graph_t 0, "Invalid input argument: edgelists.size() should be non-zero."); - bool is_weighted = edgelists[0].p_edge_weights != nullptr; - CUGRAPH_EXPECTS( std::any_of(edgelists.begin() + 1, edgelists.end(), - [is_weighted](auto edgelist) { - return (edgelist.p_src_vertices == nullptr) || - (edgelist.p_dst_vertices == nullptr) || - (is_weighted && (edgelist.p_edge_weights == nullptr)) || + [is_weighted = properties.is_weighted](auto edgelist) { + return ((edgelist.number_of_edges > 0) && (edgelist.p_src_vertices == nullptr)) || + ((edgelist.number_of_edges > 0) && (edgelist.p_dst_vertices == nullptr)) || + (is_weighted && (edgelist.number_of_edges > 0) && + (edgelist.p_edge_weights == nullptr)) || (!is_weighted && (edgelist.p_edge_weights != nullptr)); }) == false, "Invalid input argument: edgelists[].p_src_vertices and edgelists[].p_dst_vertices should not " - "be nullptr and edgelists[].p_edge_weights should be nullptr (if edgelists[0].p_edge_weights " - "is nullptr) or should not be nullptr (otherwise)."); + "be nullptr if edgelists[].number_of_edges > 0 and edgelists[].p_edge_weights should be " + "nullptr if unweighted or should not be nullptr if weighted and edgelists[].number_of_edges > " + "0."); - CUGRAPH_EXPECTS((partition.is_hypergraph_partitioned() && - (edgelists.size() == static_cast(col_comm_size))) || - (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), + CUGRAPH_EXPECTS(edgelists.size() == static_cast(col_comm_size), "Invalid input argument: errneous edgelists.size()."); // optional expensive checks (part 1/3) @@ -251,7 +248,7 @@ graph_tget_handle_ptr()->get_stream()); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); - if (is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } + if (properties.is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } } // update degree-based segment offsets (to be used for graph analytics kernel optimization) @@ -321,22 +319,12 @@ graph_t aggregate_segment_offsets(0, default_stream); - if (partition.is_hypergraph_partitioned()) { - rmm::device_uvector aggregate_segment_offsets( - col_comm_size * segment_offsets.size(), default_stream); - col_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); - } else { - rmm::device_uvector aggregate_segment_offsets( - row_comm_size * segment_offsets.size(), default_stream); - row_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); - } + rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), + default_stream); + col_comm.allgather(segment_offsets.data(), + aggregate_segment_offsets.data(), + segment_offsets.size(), + default_stream); vertex_partition_segment_offsets_.resize(aggregate_segment_offsets.size()); raft::update_host(vertex_partition_segment_offsets_.data(), @@ -344,18 +332,10 @@ graph_tget_handle_ptr()->get_stream(); CUGRAPH_EXPECTS( - (edgelist.p_src_vertices != nullptr) && (edgelist.p_dst_vertices != nullptr), + ((edgelist.number_of_edges == 0) || (edgelist.p_src_vertices != nullptr)) && + ((edgelist.number_of_edges == 0) || (edgelist.p_dst_vertices != nullptr)) && + ((properties.is_weighted && + ((edgelist.number_of_edges == 0) || (edgelist.p_edge_weights != nullptr))) || + (!properties.is_weighted && (edgelist.p_edge_weights == nullptr))), "Invalid input argument: edgelist.p_src_vertices and edgelist.p_dst_vertices should " - "not be nullptr."); + "not be nullptr if edgelist.number_of_edges > 0 and edgelist.p_edge_weights should be nullptr " + "if unweighted or should not be nullptr if weighted and edgelist.number_of_edges > 0."); // optional expensive checks (part 1/2) @@ -427,6 +412,7 @@ graph_tget_number_of_vertices(), vertex_t{0}, this->get_number_of_vertices(), + properties.is_weighted, this->get_handle_ptr()->get_stream()); // update degree-based segment offsets (to be used for graph analytics kernel optimization) diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index f443608e424..c6f39a44333 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -195,16 +196,12 @@ graph_view_t(row_comm_size))) || - (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), - "Internal Error: erroneous adj_matrix_partition_offsets.size()."); + CUGRAPH_EXPECTS(adj_matrix_partition_offsets.size() == static_cast(col_comm_size), + "Internal Error: erroneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == - (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size) * - (detail::num_segments_per_vertex_partition + 1))) || + col_comm_size * (detail::num_segments_per_vertex_partition + 1))) || (!sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == 0)), "Internal Error: vertex_partition_segment_offsets.size() does not match " @@ -267,8 +264,7 @@ graph_view_t graph_view_t< } } +template +edge_t +graph_view_t>:: + compute_max_in_degree(raft::handle_t const& handle) const +{ + auto in_degrees = compute_in_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_degrees.begin(), + in_degrees.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +edge_t graph_view_t>::compute_max_in_degree(raft::handle_t const& + handle) const +{ + auto in_degrees = compute_in_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_degrees.begin(), + in_degrees.end()); + edge_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +edge_t +graph_view_t>:: + compute_max_out_degree(raft::handle_t const& handle) const +{ + auto out_degrees = compute_out_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_degrees.begin(), + out_degrees.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +edge_t graph_view_t>::compute_max_out_degree(raft::handle_t const& + handle) const +{ + auto out_degrees = compute_out_degrees(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_degrees.begin(), + out_degrees.end()); + edge_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +weight_t +graph_view_t>:: + compute_max_in_weight_sum(raft::handle_t const& handle) const +{ + auto in_weight_sums = compute_in_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_weight_sums.begin(), + in_weight_sums.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +weight_t graph_view_t>::compute_max_in_weight_sum(raft::handle_t const& + handle) const +{ + auto in_weight_sums = compute_in_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + in_weight_sums.begin(), + in_weight_sums.end()); + weight_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + +template +weight_t +graph_view_t>:: + compute_max_out_weight_sum(raft::handle_t const& handle) const +{ + auto out_weight_sums = compute_out_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_weight_sums.begin(), + out_weight_sums.end()); + rmm::device_scalar ret(handle.get_stream()); + device_allreduce( + handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + return ret.value(handle.get_stream()); +} + +template +weight_t graph_view_t< + vertex_t, + edge_t, + weight_t, + store_transposed, + multi_gpu, + std::enable_if_t>::compute_max_out_weight_sum(raft::handle_t const& handle) const +{ + auto out_weight_sums = compute_out_weight_sums(handle); + auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + out_weight_sums.begin(), + out_weight_sums.end()); + weight_t ret{}; + raft::update_host(&ret, it, 1, handle.get_stream()); + handle.get_stream_view().synchronize(); + return ret; +} + // explicit instantiation template class graph_view_t; diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 3136515faa6..24914fb028b 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -151,7 +151,8 @@ class Louvain { protected: void initialize_dendrogram_level(vertex_t num_vertices) { - dendrogram_->add_level(current_graph_view_.get_local_vertex_first(), num_vertices); + dendrogram_->add_level( + current_graph_view_.get_local_vertex_first(), num_vertices, handle_.get_stream()); thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), dendrogram_->current_level_begin(), @@ -369,8 +370,6 @@ class Louvain { current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); rmm::device_uvector src_cluster_weights_v(next_cluster_v.size(), handle_.get_stream()); - rmm::device_uvector dst_cluster_weights_v(next_cluster_v.size(), - handle_.get_stream()); compute_cluster_sum_and_subtract(old_cluster_sum_v, cluster_subtract_v); @@ -396,19 +395,9 @@ class Louvain { vertex_to_gpu_id_op, handle_.get_stream()); - dst_cluster_weights_v = cugraph::experimental::collect_values_for_keys( - handle_.get_comms(), - cluster_keys_v_.begin(), - cluster_keys_v_.end(), - cluster_weights_v_.data(), - d_dst_cluster_cache_, - d_dst_cluster_cache_ + dst_cluster_cache_v_.size(), - vertex_to_gpu_id_op, - handle_.get_stream()); - - map_key_first = d_dst_cluster_cache_; - map_key_last = d_dst_cluster_cache_ + dst_cluster_cache_v_.size(); - map_value_first = dst_cluster_weights_v.begin(); + map_key_first = cluster_keys_v_.begin(); + map_key_last = cluster_keys_v_.end(); + map_value_first = cluster_weights_v_.begin(); } else { thrust::sort_by_key(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_keys_v_.begin(), @@ -432,12 +421,21 @@ class Louvain { map_value_first = src_cluster_weights_v.begin(); } + rmm::device_uvector src_old_cluster_sum_v( + current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), handle_.get_stream()); + rmm::device_uvector src_cluster_subtract_v( + current_graph_view_.get_number_of_local_adj_matrix_partition_rows(), handle_.get_stream()); + copy_to_adj_matrix_row( + handle_, current_graph_view_, old_cluster_sum_v.begin(), src_old_cluster_sum_v.begin()); + copy_to_adj_matrix_row( + handle_, current_graph_view_, cluster_subtract_v.begin(), src_cluster_subtract_v.begin()); + copy_v_transform_reduce_key_aggregated_out_nbr( handle_, current_graph_view_, - thrust::make_zip_iterator(thrust::make_tuple(old_cluster_sum_v.begin(), + thrust::make_zip_iterator(thrust::make_tuple(src_old_cluster_sum_v.begin(), d_src_vertex_weights_cache_, - cluster_subtract_v.begin(), + src_cluster_subtract_v.begin(), d_src_cluster_cache_, src_cluster_weights_v.begin())), diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 62bd6951f71..8d8fb0322a8 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -42,6 +42,7 @@ namespace cugraph { namespace experimental { +// FIXME: think about requiring old_new_label_pairs to be pre-shuffled template void relabel(raft::handle_t const& handle, std::tuple old_new_label_pairs, @@ -120,7 +121,12 @@ void relabel(raft::handle_t const& handle, handle.get_stream())); // cuco::static_map currently does not take stream cuco::static_map relabel_map{ - static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + rx_label_pair_old_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value}; @@ -130,7 +136,11 @@ void relabel(raft::handle_t const& handle, [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_label_pair_old_labels.size() > 0) { + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + } rx_label_pair_old_labels.resize(0, handle.get_stream()); rx_label_pair_new_labels.resize(0, handle.get_stream()); @@ -152,19 +162,29 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (rx_unique_old_labels.size() > 0) { + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the + // corresponding old labels + } std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); } } + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + cuco::static_map relabel_map( - static_cast(static_cast(unique_old_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), + unique_old_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value); @@ -175,11 +195,21 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - relabel_map.find(labels, labels + num_labels, labels); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (unique_old_labels.size() > 0) { + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } } else { cuco::static_map relabel_map( - static_cast(static_cast(num_label_pairs) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(num_label_pairs) / load_factor), + static_cast(num_label_pairs) + 1), invalid_vertex_id::value, invalid_vertex_id::value); @@ -190,8 +220,12 @@ void relabel(raft::handle_t const& handle, return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - relabel_map.insert(pair_first, pair_first + num_label_pairs); - relabel_map.find(labels, labels + num_labels, labels); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_label_pairs > 0) { relabel_map.insert(pair_first, pair_first + num_label_pairs); } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_labels > 0) { relabel_map.find(labels, labels + num_labels, labels); } } if (do_expensive_check) { diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index a8847167b87..127bd507271 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -50,62 +50,153 @@ rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, vertex_t const* vertices, vertex_t num_local_vertices /* relevant only if vertices != nullptr */, - vertex_t const* edgelist_major_vertices, - vertex_t const* edgelist_minor_vertices, - edge_t num_edgelist_edges) + std::vector const& edgelist_major_vertices, + std::vector const& edgelist_minor_vertices, + std::vector const& edgelist_edge_counts) { // FIXME: compare this sort based approach with hash based approach in both speed and memory // footprint // 1. acquire (unique major label, count) pairs - rmm::device_uvector tmp_labels(num_edgelist_edges, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - tmp_labels.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end()); - rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); - rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); - auto major_pair_it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - tmp_labels.begin(), - tmp_labels.end(), - thrust::make_constant_iterator(edge_t{1}), - major_labels.begin(), - major_counts.begin()); - tmp_labels.resize(0, handle.get_stream()); - tmp_labels.shrink_to_fit(handle.get_stream()); - major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), - handle.get_stream()); - major_counts.resize(major_labels.size(), handle.get_stream()); - major_labels.shrink_to_fit(handle.get_stream()); - major_counts.shrink_to_fit(handle.get_stream()); + rmm::device_uvector major_labels(0, handle.get_stream()); + rmm::device_uvector major_counts(0, handle.get_stream()); + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + rmm::device_uvector tmp_major_labels(0, handle.get_stream()); + rmm::device_uvector tmp_major_counts(0, handle.get_stream()); + { + rmm::device_uvector sorted_major_labels(edgelist_edge_counts[i], + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices[i], + edgelist_major_vertices[i] + edgelist_edge_counts[i], + sorted_major_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_labels.begin(), + sorted_major_labels.end()); + auto num_unique_labels = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(sorted_major_labels.size()), + [labels = sorted_major_labels.data()] __device__(auto i) { + return (i == 0) || (labels[i - 1] != labels[i]); + }); + tmp_major_labels.resize(num_unique_labels, handle.get_stream()); + tmp_major_counts.resize(tmp_major_labels.size(), handle.get_stream()); + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_labels.begin(), + sorted_major_labels.end(), + thrust::make_constant_iterator(edge_t{1}), + tmp_major_labels.begin(), + tmp_major_counts.begin()); + } + + if (multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + rmm::device_uvector rx_major_labels(0, handle.get_stream()); + rmm::device_uvector rx_major_counts(0, handle.get_stream()); + auto rx_sizes = host_scalar_gather( + col_comm, tmp_major_labels.size(), static_cast(i), handle.get_stream()); + std::vector rx_displs{}; + if (static_cast(i) == col_comm_rank) { + rx_displs.assign(col_comm_size, size_t{0}); + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + rx_major_labels.resize(rx_displs.back() + rx_sizes.back(), handle.get_stream()); + rx_major_counts.resize(rx_major_labels.size(), handle.get_stream()); + } + device_gatherv(col_comm, + thrust::make_zip_iterator( + thrust::make_tuple(tmp_major_labels.begin(), tmp_major_counts.begin())), + thrust::make_zip_iterator( + thrust::make_tuple(rx_major_labels.begin(), rx_major_counts.begin())), + tmp_major_labels.size(), + rx_sizes, + rx_displs, + static_cast(i), + handle.get_stream()); + if (static_cast(i) == col_comm_rank) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_major_labels.begin(), + rx_major_labels.end(), + rx_major_counts.begin()); + major_labels.resize(rx_major_labels.size(), handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_major_labels.begin(), + rx_major_labels.end(), + rx_major_counts.begin(), + major_labels.begin(), + major_counts.begin()); + major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(pair_it)), + handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + } + } else { + tmp_major_labels.shrink_to_fit(handle.get_stream()); + tmp_major_counts.shrink_to_fit(handle.get_stream()); + major_labels = std::move(tmp_major_labels); + major_counts = std::move(tmp_major_counts); + } + } // 2. acquire unique minor labels - rmm::device_uvector minor_labels(num_edgelist_edges, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - minor_labels.begin()); + std::vector minor_displs(edgelist_minor_vertices.size(), edge_t{0}); + std::partial_sum( + edgelist_edge_counts.begin(), edgelist_edge_counts.end() - 1, minor_displs.begin() + 1); + rmm::device_uvector minor_labels(minor_displs.back() + edgelist_edge_counts.back(), + handle.get_stream()); + for (size_t i = 0; i < edgelist_minor_vertices.size(); ++i) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices[i], + edgelist_minor_vertices[i] + edgelist_edge_counts[i], + minor_labels.begin() + minor_displs[i]); + } thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), minor_labels.begin(), minor_labels.end()); - auto minor_label_it = - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - minor_labels.begin(), - minor_labels.end()); - minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); + minor_labels.resize( + thrust::distance(minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end())), + handle.get_stream()); + if (multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + + rmm::device_uvector rx_minor_labels(0, handle.get_stream()); + std::tie(rx_minor_labels, std::ignore) = groupby_gpuid_and_shuffle_values( + row_comm, + minor_labels.begin(), + minor_labels.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{row_comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end()); + rx_minor_labels.resize( + thrust::distance( + rx_minor_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_minor_labels.begin(), + rx_minor_labels.end())), + handle.get_stream()); + minor_labels = std::move(rx_minor_labels); + } minor_labels.shrink_to_fit(handle.get_stream()); // 3. merge major and minor labels and vertex labels rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), handle.get_stream()); - rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), major_labels.begin(), @@ -142,47 +233,7 @@ rmm::device_uvector compute_renumber_map( labels.shrink_to_fit(handle.get_stream()); counts.shrink_to_fit(handle.get_stream()); - // 4. if multi-GPU, shuffle and reduce (label, count) pairs - - if (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); - rmm::device_uvector rx_labels(0, handle.get_stream()); - rmm::device_uvector rx_counts(0, handle.get_stream()); - std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = - groupby_gpuid_and_shuffle_values( - comm, - pair_first, - pair_first + labels.size(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( - auto val) { return key_func(thrust::get<0>(val)); }, - handle.get_stream()); - - labels.resize(rx_labels.size(), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin()); - pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_labels.begin(), - rx_labels.end(), - rx_counts.begin(), - labels.begin(), - counts.begin()); - rx_labels.resize(0, handle.get_stream()); - rx_counts.resize(0, handle.get_stream()); - rx_labels.shrink_to_fit(handle.get_stream()); - rx_counts.shrink_to_fit(handle.get_stream()); - labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); - counts.resize(labels.size(), handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - labels.shrink_to_fit(handle.get_stream()); - } - - // 5. if vertices != nullptr, add isolated vertices + // 4. if vertices != nullptr, add isolated vertices rmm::device_uvector isolated_vertices(0, handle.get_stream()); if (vertices != nullptr) { @@ -232,10 +283,9 @@ void expensive_check_edgelist( raft::handle_t const& handle, vertex_t const* local_vertices, vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, - vertex_t const* edgelist_major_vertices, - vertex_t const* edgelist_minor_vertices, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned /* relevant only if multi_gpu == true */) + std::vector const& edgelist_major_vertices, + std::vector const& edgelist_minor_vertices, + std::vector const& edgelist_edge_counts) { rmm::device_uvector sorted_local_vertices( local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); @@ -246,6 +296,12 @@ void expensive_check_edgelist( thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), sorted_local_vertices.begin(), sorted_local_vertices.end()); + CUGRAPH_EXPECTS(static_cast(thrust::distance( + sorted_local_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()))) == sorted_local_vertices.size(), + "Invalid input argument: local_vertices should not have duplicates."); if (multi_gpu) { auto& comm = handle.get_comms(); @@ -253,8 +309,15 @@ void expensive_check_edgelist( auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + CUGRAPH_EXPECTS((edgelist_major_vertices.size() == edgelist_minor_vertices.size()) && + (edgelist_major_vertices.size() == static_cast(col_comm_size)), + "Invalid input argument: both edgelist_major_vertices.size() & " + "edgelist_minor_vertices.size() should coincide with col_comm_size."); CUGRAPH_EXPECTS( thrust::count_if( @@ -268,95 +331,127 @@ void expensive_check_edgelist( }) == 0, "Invalid input argument: local_vertices should be pre-shuffled."); - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); - CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + num_edgelist_edges, - [comm_rank, - key_func = - detail::compute_gpu_id_from_edge_t{is_hypergraph_partitioned, - comm_size, - row_comm_size, - col_comm_size}] __device__(auto edge) { - return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank; - }) == 0, - "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " - "pre-shuffled."); - - if (local_vertices != nullptr) { - rmm::device_uvector unique_edge_vertices(num_edgelist_edges * 2, - handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - unique_edge_vertices.begin()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - unique_edge_vertices.begin() + num_edgelist_edges); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end()); - unique_edge_vertices.resize( - thrust::distance( - unique_edge_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end())), - handle.get_stream()); - - rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); - std::tie(rx_unique_edge_vertices, std::ignore) = groupby_gpuid_and_shuffle_values( - handle.get_comms(), - unique_edge_vertices.begin(), - unique_edge_vertices.end(), - [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( - auto val) { return key_func(val); }, - handle.get_stream()); - - unique_edge_vertices = std::move(rx_unique_edge_vertices); - + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); CUGRAPH_EXPECTS( thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - unique_edge_vertices.begin(), - unique_edge_vertices.end(), - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + edge_first, + edge_first + edgelist_edge_counts[i], + [comm_size, + comm_rank, + row_comm_rank, + col_comm_size, + col_comm_rank, + i, + gpu_id_key_func = + detail::compute_gpu_id_from_edge_t{comm_size, row_comm_size, col_comm_size}, + partition_id_key_func = + detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto edge) { + return (gpu_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank) || + (partition_id_key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != + row_comm_rank * col_comm_size + col_comm_rank + i * comm_size); }) == 0, - "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " - "invalid vertex ID(s)."); + "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " + "pre-shuffled."); + + auto aggregate_vertexlist_size = host_scalar_allreduce( + comm, + local_vertices != nullptr ? num_local_vertices : vertex_t{0}, + handle.get_stream()); // local_vertices != nullptr is insufficient in multi-GPU as only a + // subset of GPUs may have a non-zero vertices + if (aggregate_vertexlist_size > 0) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + rmm::device_uvector sorted_major_vertices(0, handle.get_stream()); + { + auto recvcounts = + host_scalar_allgather(col_comm, sorted_local_vertices.size(), handle.get_stream()); + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + sorted_major_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + device_allgatherv(col_comm, + sorted_local_vertices.data(), + sorted_major_vertices.data(), + recvcounts, + displacements, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_major_vertices.begin(), + sorted_major_vertices.end()); + } + + rmm::device_uvector sorted_minor_vertices(0, handle.get_stream()); + { + auto recvcounts = + host_scalar_allgather(row_comm, sorted_local_vertices.size(), handle.get_stream()); + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + sorted_minor_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + device_allgatherv(row_comm, + sorted_local_vertices.data(), + sorted_minor_vertices.data(), + recvcounts, + displacements, + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_minor_vertices.begin(), + sorted_minor_vertices.end()); + } + + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[i], edgelist_minor_vertices[i])); + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[i], + [num_major_vertices = static_cast(sorted_major_vertices.size()), + sorted_major_vertices = sorted_major_vertices.data(), + num_minor_vertices = static_cast(sorted_minor_vertices.size()), + sorted_minor_vertices = sorted_minor_vertices.data()] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_major_vertices, + sorted_major_vertices + num_major_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_minor_vertices, + sorted_minor_vertices + num_minor_vertices, + thrust::get<1>(e)); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_mior_vertices have " + "invalid vertex ID(s)."); + } } } else { - if (local_vertices != nullptr) { - CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); - }) == 0, - "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + assert(edgelist_major_vertices.size() == 1); + assert(edgelist_minor_vertices.size() == 1); + if (local_vertices != nullptr) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices[0], edgelist_minor_vertices[0])); CUGRAPH_EXPECTS( - thrust::count_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { - return !thrust::binary_search( - thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); - }) == 0, - "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[0], + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_local_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_local_vertices, + thrust::get<1>(e)); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " + "invalid vertex ID(s)."); } } } @@ -368,15 +463,15 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of enumber_edgelist not supported on Pascal and older architectures."); #ifdef CUCO_STATIC_MAP_DEFINED auto& comm = handle.get_comms(); @@ -389,14 +484,20 @@ renumber_edgelist(raft::handle_t const& handle, auto const col_comm_size = col_comm.get_size(); auto const col_comm_rank = col_comm.get_rank(); + std::vector edgelist_const_major_vertices(edgelist_major_vertices.size()); + std::vector edgelist_const_minor_vertices(edgelist_const_major_vertices.size()); + for (size_t i = 0; i < edgelist_const_major_vertices.size(); ++i) { + edgelist_const_major_vertices[i] = edgelist_major_vertices[i]; + edgelist_const_minor_vertices[i] = edgelist_minor_vertices[i]; + } + if (do_expensive_check) { expensive_check_edgelist(handle, local_vertices, num_local_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned); + edgelist_const_major_vertices, + edgelist_const_minor_vertices, + edgelist_edge_counts); } // 1. compute renumber map @@ -405,142 +506,129 @@ renumber_edgelist(raft::handle_t const& handle, detail::compute_renumber_map(handle, local_vertices, num_local_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges); + edgelist_const_major_vertices, + edgelist_const_minor_vertices, + edgelist_edge_counts); // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened // graph - auto vertex_partition_counts = host_scalar_allgather( + auto vertex_counts = host_scalar_allgather( comm, static_cast(renumber_map_labels.size()), handle.get_stream()); std::vector vertex_partition_offsets(comm_size + 1, 0); - std::partial_sum(vertex_partition_counts.begin(), - vertex_partition_counts.end(), - vertex_partition_offsets.begin() + 1); + std::partial_sum( + vertex_counts.begin(), vertex_counts.end(), vertex_partition_offsets.begin() + 1); - partition_t partition(vertex_partition_offsets, - is_hypergraph_partitioned, - row_comm_size, - col_comm_size, - row_comm_rank, - col_comm_rank); + partition_t partition( + vertex_partition_offsets, row_comm_size, col_comm_size, row_comm_rank, col_comm_rank); auto number_of_vertices = vertex_partition_offsets.back(); - auto number_of_edges = host_scalar_allreduce(comm, num_edgelist_edges, handle.get_stream()); + auto number_of_edges = host_scalar_allreduce( + comm, + std::accumulate(edgelist_edge_counts.begin(), edgelist_edge_counts.end(), edge_t{0}), + handle.get_stream()); // 3. renumber edges - if (is_hypergraph_partitioned) { - CUGRAPH_FAIL("unimplemented."); - } else { - double constexpr load_factor = 0.7; + double constexpr load_factor = 0.7; - // FIXME: compare this hash based approach with a binary search based approach in both memory - // footprint and execution time + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time - { - vertex_t major_first{}; - vertex_t major_last{}; - std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); - rmm::device_uvector renumber_map_major_labels(major_last - major_first, - handle.get_stream()); - std::vector recvcounts(row_comm_size); - for (int i = 0; i < row_comm_size; ++i) { - recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); - } - std::vector displacements(row_comm_size, 0); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); - device_allgatherv(row_comm, - renumber_map_labels.begin(), - renumber_map_major_labels.begin(), - recvcounts, - displacements, - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream - - cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), - thrust::make_counting_iterator(major_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); - renumber_map.find(edgelist_major_vertices, - edgelist_major_vertices + num_edgelist_edges, - edgelist_major_vertices); + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + rmm::device_uvector renumber_map_major_labels( + col_comm_rank == static_cast(i) ? vertex_t{0} + : partition.get_matrix_partition_major_size(i), + handle.get_stream()); + device_bcast(col_comm, + renumber_map_labels.data(), + renumber_map_major_labels.data(), + partition.get_matrix_partition_major_size(i), + i, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), + static_cast(partition.get_matrix_partition_major_size(i)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + col_comm_rank == static_cast(i) ? renumber_map_labels.begin() + : renumber_map_major_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (partition.get_matrix_partition_major_size(i) > 0) { + renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (edgelist_edge_counts[i]) { + renumber_map.find(edgelist_major_vertices[i], + edgelist_major_vertices[i] + edgelist_edge_counts[i], + edgelist_major_vertices[i]); + } + } - { - vertex_t minor_first{}; - vertex_t minor_last{}; - std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); - rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, - handle.get_stream()); - - // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph - // partitioning - auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; - auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may be no longer necessary with NCCL backend - if (comm_src_rank == comm_rank) { - assert(comm_dst_rank == comm_rank); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - renumber_map_labels.begin(), - renumber_map_labels.end(), - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); - } else { - device_sendrecv(comm, - renumber_map_labels.begin(), - renumber_map_labels.size(), - comm_dst_rank, - renumber_map_minor_labels.begin() + - (partition.get_vertex_partition_first(comm_src_rank) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(partition.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - handle.get_stream()); - } - - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - for (int i = 0; i < col_comm_size; ++i) { - auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - - partition.get_vertex_partition_first(row_comm_rank * col_comm_size); - auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); - device_bcast(col_comm, - renumber_map_minor_labels.begin() + offset, - renumber_map_minor_labels.begin() + offset, - count, - i, - handle.get_stream()); - } + { + rmm::device_uvector renumber_map_minor_labels( + partition.get_matrix_partition_minor_size(), handle.get_stream()); + std::vector recvcounts(row_comm_size); + for (int i = 0; i < row_comm_size; ++i) { + recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + } + std::vector displacements(recvcounts.size(), 0); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + device_allgatherv(row_comm, + renumber_map_labels.begin(), + renumber_map_minor_labels.begin(), + recvcounts, + displacements, + handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // cuco::static_map currently does not take stream + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ + cuco::static_map renumber_map{ + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), - thrust::make_counting_iterator(minor_first))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + renumber_map_minor_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (renumber_map_minor_labels.size()) { renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); - renumber_map.find(edgelist_minor_vertices, - edgelist_minor_vertices + num_edgelist_edges, - edgelist_minor_vertices); + } + for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the + // grid size is 0; this leads to cudaErrorInvaildConfiguration. + if (edgelist_edge_counts[i]) { + renumber_map.find(edgelist_minor_vertices[i], + edgelist_minor_vertices[i] + edgelist_edge_counts[i], + edgelist_minor_vertices[i]); + } } } @@ -565,27 +653,28 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); #ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { - expensive_check_edgelist(handle, - vertices, - num_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - false); + expensive_check_edgelist( + handle, + vertices, + num_vertices, + std::vector{edgelist_major_vertices}, + std::vector{edgelist_minor_vertices}, + std::vector{num_edgelist_edges}); } - auto renumber_map_labels = - detail::compute_renumber_map(handle, - vertices, - num_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges); + auto renumber_map_labels = detail::compute_renumber_map( + handle, + vertices, + num_vertices, + std::vector{edgelist_major_vertices}, + std::vector{edgelist_minor_vertices}, + std::vector{num_edgelist_edges}); double constexpr load_factor = 0.7; @@ -593,7 +682,11 @@ std::enable_if_t> renumber_edgelist( // footprint and execution time cuco::static_map renumber_map{ - static_cast(static_cast(renumber_map_labels.size()) / load_factor), + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), + renumber_map_labels.size() + 1), invalid_vertex_id::value, invalid_vertex_id::value}; auto pair_first = thrust::make_transform_iterator( @@ -602,11 +695,21 @@ std::enable_if_t> renumber_edgelist( [] __device__(auto val) { return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); }); - renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); - renumber_map.find( - edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); - renumber_map.find( - edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (renumber_map_labels.size()) { + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + } + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_edgelist_edges > 0) { + renumber_map.find(edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + edgelist_major_vertices); + renumber_map.find(edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + edgelist_minor_vertices); + } return renumber_map_labels; #else @@ -620,22 +723,21 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0}, edgelist_major_vertices, edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, + edgelist_edge_counts, do_expensive_check); } @@ -648,8 +750,9 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, static_cast(nullptr), vertex_t{0} /* dummy */, @@ -665,22 +768,21 @@ std::enable_if_t const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, local_vertices, num_local_vertices, edgelist_major_vertices, edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, + edgelist_edge_counts, do_expensive_check); } @@ -695,8 +797,9 @@ std::enable_if_t> renumber_edgelist( bool do_expensive_check) { // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); + CUGRAPH_EXPECTS( + handle.get_device_properties().major >= 7, + "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, vertices, num_vertices, @@ -711,12 +814,12 @@ std::enable_if_t> renumber_edgelist( // instantiations for // template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -726,14 +829,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -747,12 +850,12 @@ template rmm::device_uvector renumber_edgelist // instantiations for // template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -762,14 +865,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -783,12 +886,12 @@ template rmm::device_uvector renumber_edgelist // instantiations for // template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, @@ -798,14 +901,14 @@ template rmm::device_uvector renumber_edgelist bool do_expensive_check); template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist(raft::handle_t const& handle, - int64_t const* local_vertices, - int64_t num_local_vertices, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, - bool do_expensive_check); +renumber_edgelist( + raft::handle_t const& handle, + int64_t const* local_vertices, + int64_t num_local_vertices, + std::vector const& edgelist_major_vertices /* [INOUT] */, + std::vector const& edgelist_minor_vertices /* [INOUT] */, + std::vector const& edgelist_edge_counts, + bool do_expensive_check); template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu new file mode 100644 index 00000000000..8f59683d9d6 --- /dev/null +++ b/cpp/src/experimental/renumber_utils.cu @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +template +void renumber_ext_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check) +{ + double constexpr load_factor = 0.7; + + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "renumber_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + rmm::device_uvector labels(local_int_vertex_last - local_int_vertex_first, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + renumber_map_labels, + renumber_map_labels + labels.size(), + labels.begin()); + thrust::sort( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels.begin(), labels.end()); + CUGRAPH_EXPECTS(thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels.begin(), + labels.end()) == labels.end(), + "Invalid input arguments: renumber_map_labels have duplicate elements."); + } + + auto renumber_map_ptr = std::make_unique>( + size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector sorted_unique_ext_vertices(num_vertices, handle.get_stream()); + sorted_unique_ext_vertices.resize( + thrust::distance( + sorted_unique_ext_vertices.begin(), + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_unique_ext_vertices.begin(), + [] __device__(auto v) { return v != invalid_vertex_id::value; })), + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end()); + sorted_unique_ext_vertices.resize( + thrust::distance( + sorted_unique_ext_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end())), + handle.get_stream()); + + auto int_vertices_for_sorted_unique_ext_vertices = collect_values_for_unique_keys( + comm, + renumber_map_labels, + renumber_map_labels + (local_int_vertex_last - local_int_vertex_first), + thrust::make_counting_iterator(local_int_vertex_first), + sorted_unique_ext_vertices.begin(), + sorted_unique_ext_vertices.end(), + detail::compute_gpu_id_from_vertex_t{comm_size}, + handle.get_stream()); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + renumber_map_ptr.reset(); + + renumber_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), + sorted_unique_ext_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto kv_pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (sorted_unique_ext_vertices.size()) { + renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); + } + } else { + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + renumber_map_ptr.reset(); + + renumber_map_ptr = std::make_unique>( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max(static_cast( + static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), + static_cast(local_int_vertex_last - local_int_vertex_first) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if ((local_int_vertex_last - local_int_vertex_first) > 0) { + renumber_map_ptr->insert(pair_first, + pair_first + (local_int_vertex_last - local_int_vertex_first)); + } + } + + if (do_expensive_check) { + rmm::device_uvector contains(num_vertices, handle.get_stream()); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { + renumber_map_ptr->contains(vertices, vertices + num_vertices, contains.begin()); + } + auto vc_pair_first = thrust::make_zip_iterator(thrust::make_tuple(vertices, contains.begin())); + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vc_pair_first, + vc_pair_first + num_vertices, + [] __device__(auto pair) { + auto v = thrust::get<0>(pair); + auto c = thrust::get<1>(pair); + return v == invalid_vertex_id::value + ? (c == true) + : (c == false); + }) == 0, + "Invalid input arguments: vertices have elements that are missing in " + "(aggregate) renumber_map_labels."); + } + + // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 +#if 1 + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [view = renumber_map_ptr->get_device_view()] __device__(auto v) { + return v != invalid_vertex_id::value + ? view.find(v)->second.load(cuda::std::memory_order_relaxed) + : invalid_vertex_id::value; + }); +#else + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); } +#endif +#endif +} + +template +void unrenumber_local_int_vertices( + raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels /* size = local_int_vertex_last - local_int_vertex_first */, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "unrenumber_local_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + [local_int_vertex_first, local_int_vertex_last] __device__(auto v) { + return v != invalid_vertex_id::value && + (v < local_int_vertex_first || v >= local_int_vertex_last); + }) == 0, + "Invalid input arguments: there are non-local vertices in [vertices, vertices " + "+ num_vertices)."); + } + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [renumber_map_labels, local_int_vertex_first] __device__(auto v) { + return v == invalid_vertex_id::value + ? v + : renumber_map_labels[v - local_int_vertex_first]; + }); +#endif +} + +template +void unrenumber_int_vertices(raft::handle_t const& handle, + vertex_t* vertices /* [INOUT] */, + size_t num_vertices, + vertex_t const* renumber_map_labels, + vertex_t local_int_vertex_first, + vertex_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check) +{ + double constexpr load_factor = 0.7; + + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "unrenumber_vertices() not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + [int_vertex_last = vertex_partition_lasts.back()] __device__(auto v) { + return v != invalid_vertex_id::value && + !is_valid_vertex(int_vertex_last, v); + }) == 0, + "Invalid input arguments: there are out-of-range vertices in [vertices, vertices " + "+ num_vertices)."); + } + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector sorted_unique_int_vertices(num_vertices, handle.get_stream()); + sorted_unique_int_vertices.resize( + thrust::distance( + sorted_unique_int_vertices.begin(), + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_unique_int_vertices.begin(), + [] __device__(auto v) { return v != invalid_vertex_id::value; })), + handle.get_stream()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end()); + sorted_unique_int_vertices.resize( + thrust::distance( + sorted_unique_int_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end())), + handle.get_stream()); + + rmm::device_uvector d_vertex_partition_lasts(vertex_partition_lasts.size(), + handle.get_stream()); + raft::update_device(d_vertex_partition_lasts.data(), + vertex_partition_lasts.data(), + vertex_partition_lasts.size(), + handle.get_stream()); + rmm::device_uvector d_tx_int_vertex_offsets(d_vertex_partition_lasts.size(), + handle.get_stream()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end(), + d_vertex_partition_lasts.begin(), + d_vertex_partition_lasts.end(), + d_tx_int_vertex_offsets.begin()); + std::vector h_tx_int_vertex_counts(d_tx_int_vertex_offsets.size()); + raft::update_host(h_tx_int_vertex_counts.data(), + d_tx_int_vertex_offsets.data(), + d_tx_int_vertex_offsets.size(), + handle.get_stream()); + handle.get_stream_view().synchronize(); + std::adjacent_difference( + h_tx_int_vertex_counts.begin(), h_tx_int_vertex_counts.end(), h_tx_int_vertex_counts.begin()); + + rmm::device_uvector rx_int_vertices(0, handle.get_stream()); + std::vector rx_int_vertex_counts{}; + std::tie(rx_int_vertices, rx_int_vertex_counts) = shuffle_values( + comm, sorted_unique_int_vertices.begin(), h_tx_int_vertex_counts, handle.get_stream()); + + auto tx_ext_vertices = std::move(rx_int_vertices); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tx_ext_vertices.begin(), + tx_ext_vertices.end(), + tx_ext_vertices.begin(), + [renumber_map_labels, local_int_vertex_first] __device__(auto v) { + return renumber_map_labels[v - local_int_vertex_first]; + }); + + rmm::device_uvector rx_ext_vertices_for_sorted_unique_int_vertices( + 0, handle.get_stream()); + std::tie(rx_ext_vertices_for_sorted_unique_int_vertices, std::ignore) = + shuffle_values(comm, tx_ext_vertices.begin(), rx_int_vertex_counts, handle.get_stream()); + + handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream + + cuco::static_map unrenumber_map( + // FIXME: std::max(..., ...) as a temporary workaround for + // https://github.com/NVIDIA/cuCollections/issues/72 and + // https://github.com/NVIDIA/cuCollections/issues/73 + std::max( + static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), + sorted_unique_int_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(sorted_unique_int_vertices.begin(), + rx_ext_vertices_for_sorted_unique_int_vertices.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (sorted_unique_int_vertices.size()) { + unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); + } + // FIXME: a temporary workaround for https://github.com/NVIDIA/cuCollections/issues/74 +#if 1 + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + vertices, + [view = unrenumber_map.get_device_view()] __device__(auto v) { + return v != invalid_vertex_id::value + ? view.find(v)->second.load(cuda::std::memory_order_relaxed) + : invalid_vertex_id::value; + }); +#else + // FIXME: a temporary workaround. cuco::static_map currently launches a kernel even if the grid + // size is 0; this leads to cudaErrorInvaildConfiguration. + if (num_vertices > 0) { unrenumber_map.find(vertices, vertices + num_vertices, vertices); } +#endif + } else { + unrenumber_local_int_vertices(handle, + vertices, + num_vertices, + renumber_map_labels, + local_int_vertex_first, + local_int_vertex_last, + do_expensive_check); + } +#endif +} + +// explicit instantiation + +template void renumber_ext_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void renumber_ext_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_local_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_local_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +template void unrenumber_int_vertices(raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector& vertex_partition_lasts, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 4996b3734cb..373444cb0a2 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -70,6 +70,9 @@ void sssp(raft::handle_t const &handle, CUGRAPH_EXPECTS(push_graph_view.is_valid_vertex(source_vertex), "Invalid input argument: source vertex out-of-range."); + CUGRAPH_EXPECTS(push_graph_view.is_weighted(), + "Invalid input argument: an unweighted graph is passed to SSSP, BFS is more " + "efficient for unweighted graphs."); if (do_expensive_check) { auto num_negative_edge_weights = @@ -126,10 +129,7 @@ void sssp(raft::handle_t const &handle, // FIXME: need to double check the bucket sizes are sufficient std::vector bucket_sizes(static_cast(Bucket::num_buckets), push_graph_view.get_number_of_local_vertices()); - VertexFrontier, - vertex_t, - GraphViewType::is_multi_gpu, - static_cast(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); // 5. SSSP iteration @@ -188,7 +188,7 @@ void sssp(raft::handle_t const &handle, threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return thrust::make_tuple(push, new_distance, src); + return thrust::make_tuple(push, thrust::make_tuple(new_distance, src)); }, reduce_op::min>(), distances, @@ -199,8 +199,8 @@ void sssp(raft::handle_t const &handle, auto idx = new_dist < v_val ? (new_dist < near_far_threshold ? static_cast(Bucket::new_near) : static_cast(Bucket::far)) - : VertexFrontier, vertex_t>::kInvalidBucketIdx; - return thrust::make_tuple(idx, thrust::get<0>(pushed_val), thrust::get<1>(pushed_val)); + : VertexFrontier::kInvalidBucketIdx; + return thrust::make_tuple(idx, pushed_val); }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); @@ -222,7 +222,7 @@ void sssp(raft::handle_t const &handle, auto dist = *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)); if (dist < old_near_far_threshold) { - return VertexFrontier, vertex_t>::kInvalidBucketIdx; + return VertexFrontier::kInvalidBucketIdx; } else if (dist < near_far_threshold) { return static_cast(Bucket::cur_near); } else { diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index a9e3146bbcd..4a2b98ea815 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -20,22 +20,101 @@ #include #include #include -#include #include #include #include #include +#include +#include + #include +#include +#include #include +#include #include +#include +#include + +#include +#include namespace cugraph { namespace cython { namespace detail { -// FIXME: Add description of this function +// workaround for CUDA extended lambda restrictions +template +struct compute_local_partition_id_t { + vertex_t const* lasts{nullptr}; + size_t num_local_partitions{0}; + + __device__ size_t operator()(vertex_t v) + { + for (size_t i = 0; i < num_local_partitions; ++i) { + if (v < lasts[i]) { return i; } + } + return num_local_partitions; + } +}; + +// FIXME: this is unnecessary if edge_counts_ in the major_minor_weights_t object returned by +// call_shuffle() is passed back, better be fixed. this code assumes that the entire set of edges +// for each partition are consecutively stored. +template +std::vector compute_edge_counts(raft::handle_t const& handle, + graph_container_t const& graph_container) +{ + auto num_local_partitions = static_cast(graph_container.col_comm_size); + + std::vector partition_offsets_vector( + reinterpret_cast(graph_container.vertex_partition_offsets), + reinterpret_cast(graph_container.vertex_partition_offsets) + + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); + + std::vector h_lasts(num_local_partitions); + for (size_t i = 0; i < h_lasts.size(); ++i) { + h_lasts[i] = partition_offsets_vector[graph_container.row_comm_size * (i + 1)]; + } + rmm::device_uvector d_lasts(h_lasts.size(), handle.get_stream()); + raft::update_device(d_lasts.data(), h_lasts.data(), h_lasts.size(), handle.get_stream()); + auto major_vertices = transposed + ? reinterpret_cast(graph_container.dst_vertices) + : reinterpret_cast(graph_container.src_vertices); + auto key_first = thrust::make_transform_iterator( + major_vertices, compute_local_partition_id_t{d_lasts.data(), num_local_partitions}); + rmm::device_uvector d_local_partition_ids(num_local_partitions, handle.get_stream()); + rmm::device_uvector d_edge_counts(d_local_partition_ids.size(), handle.get_stream()); + auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_first, + key_first + graph_container.num_local_edges, + thrust::make_constant_iterator(edge_t{1}), + d_local_partition_ids.begin(), + d_edge_counts.begin()); + if (static_cast(thrust::distance(d_local_partition_ids.begin(), thrust::get<0>(it))) < + num_local_partitions) { + rmm::device_uvector d_counts(num_local_partitions, handle.get_stream()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_counts.begin(), + d_counts.end(), + edge_t{0}); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edge_counts.begin(), + thrust::get<1>(it), + d_local_partition_ids.begin(), + d_counts.begin()); + d_edge_counts = std::move(d_counts); + } + std::vector h_edge_counts(num_local_partitions, 0); + raft::update_host( + h_edge_counts.data(), d_edge_counts.data(), d_edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + return h_edge_counts; +} + template > create_graph(raft::handle_t const& handle, graph_container_t const& graph_container) { - std::vector> edgelist( - {{reinterpret_cast(graph_container.src_vertices), - reinterpret_cast(graph_container.dst_vertices), - reinterpret_cast(graph_container.weights), - static_cast(graph_container.num_partition_edges)}}); + auto num_local_partitions = static_cast(graph_container.col_comm_size); std::vector partition_offsets_vector( reinterpret_cast(graph_container.vertex_partition_offsets), reinterpret_cast(graph_container.vertex_partition_offsets) + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); + auto edge_counts = compute_edge_counts(handle, graph_container); + + std::vector displacements(edge_counts.size(), 0); + std::partial_sum(edge_counts.begin(), edge_counts.end() - 1, displacements.begin() + 1); + + std::vector> edgelists( + num_local_partitions); + for (size_t i = 0; i < edgelists.size(); ++i) { + edgelists[i] = cugraph::experimental::edgelist_t{ + reinterpret_cast(graph_container.src_vertices) + displacements[i], + reinterpret_cast(graph_container.dst_vertices) + displacements[i], + graph_container.graph_props.is_weighted + ? reinterpret_cast(graph_container.weights) + displacements[i] + : static_cast(nullptr), + edge_counts[i]}; + } + experimental::partition_t partition(partition_offsets_vector, - graph_container.hypergraph_partitioned, graph_container.row_comm_size, graph_container.col_comm_size, graph_container.row_comm_rank, @@ -65,14 +156,12 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai return std::make_unique>( handle, - edgelist, + edgelists, partition, static_cast(graph_container.num_global_vertices), static_cast(graph_container.num_global_edges), graph_container.graph_props, - // FIXME: This currently fails if sorted_by_degree is true... - // graph_container.sorted_by_degree, - false, + true, graph_container.do_expensive_check); } @@ -89,7 +178,7 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai reinterpret_cast(graph_container.src_vertices), reinterpret_cast(graph_container.dst_vertices), reinterpret_cast(graph_container.weights), - static_cast(graph_container.num_partition_edges)}; + static_cast(graph_container.num_local_edges)}; return std::make_unique>( handle, edgelist, @@ -113,10 +202,11 @@ void populate_graph_container(graph_container_t& graph_container, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu) { @@ -124,7 +214,6 @@ void populate_graph_container(graph_container_t& graph_container, "populate_graph_container() can only be called on an empty container."); bool do_expensive_check{true}; - bool hypergraph_partitioned{false}; if (multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -143,7 +232,7 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.src_vertices = src_vertices; graph_container.dst_vertices = dst_vertices; graph_container.weights = weights; - graph_container.num_partition_edges = num_partition_edges; + graph_container.num_local_edges = num_local_edges; graph_container.num_global_vertices = num_global_vertices; graph_container.num_global_edges = num_global_edges; graph_container.vertexType = vertexType; @@ -151,11 +240,11 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.weightType = weightType; graph_container.transposed = transposed; graph_container.is_multi_gpu = multi_gpu; - graph_container.hypergraph_partitioned = hypergraph_partitioned; graph_container.sorted_by_degree = sorted_by_degree; graph_container.do_expensive_check = do_expensive_check; - experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; + experimental::graph_properties_t graph_props{ + .is_symmetric = false, .is_multigraph = false, .is_weighted = is_weighted}; graph_container.graph_props = graph_props; graph_container.graph_type = graphTypeEnum::graph_t; @@ -177,7 +266,7 @@ void populate_graph_container_legacy(graph_container_t& graph_container, int* local_offsets) { CUGRAPH_EXPECTS(graph_container.graph_type == graphTypeEnum::null, - "populate_graph_container() can only be called on an empty container."); + "populate_graph_container_legacy() can only be called on an empty container."); // FIXME: This is soon-to-be legacy code left in place until the new graph_t // class is supported everywhere else. Remove everything down to the comment @@ -802,23 +891,23 @@ void call_sssp(raft::handle_t const& handle, // wrapper for shuffling: // template -std::unique_ptr> call_shuffle( +std::unique_ptr> call_shuffle( raft::handle_t const& handle, vertex_t* edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place vertex_t* edgelist_minor_vertices, // [IN / OUT] weight_t* edgelist_weights, // [IN / OUT] - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned) // = false + edge_t num_edgelist_edges) { - auto& comm = handle.get_comms(); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - - std::unique_ptr> ptr_ret = - std::make_unique>(handle); + std::unique_ptr> ptr_ret = + std::make_unique>(handle); if (edgelist_weights != nullptr) { auto zip_edge = thrust::make_zip_iterator( @@ -833,10 +922,7 @@ std::unique_ptr> call_shuffle( zip_edge + num_edgelist_edges, [key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { + comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, handle.get_stream()); @@ -852,15 +938,46 @@ std::unique_ptr> call_shuffle( zip_edge + num_edgelist_edges, [key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - is_hypergraph_partitioned, - comm.get_size(), - row_comm.get_size(), - col_comm.get_size()}] __device__(auto val) { + comm.get_size(), row_comm.get_size(), col_comm.get_size()}] __device__(auto val) { return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, handle.get_stream()); } + auto local_partition_id_op = + [comm_size, + key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(ptr_ret->get_major().data(), ptr_ret->get_minor().data())); + + auto edge_counts = + (edgelist_weights != nullptr) + ? cugraph::experimental::groupby_and_count(pair_first, + pair_first + ptr_ret->get_major().size(), + ptr_ret->get_weights().data(), + local_partition_id_op, + col_comm_size, + handle.get_stream()) + : cugraph::experimental::groupby_and_count(pair_first, + pair_first + ptr_ret->get_major().size(), + local_partition_id_op, + col_comm_size, + handle.get_stream()); + + std::vector h_edge_counts(edge_counts.size()); + raft::update_host( + h_edge_counts.data(), edge_counts.data(), edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + ptr_ret->get_edge_counts().resize(h_edge_counts.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + ptr_ret->get_edge_counts()[i] = static_cast(h_edge_counts[i]); + } + return ptr_ret; // RVO-ed } @@ -872,8 +989,7 @@ std::unique_ptr> call_renumber( raft::handle_t const& handle, vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu) // bc. cython cannot take non-type template params { @@ -883,33 +999,31 @@ std::unique_ptr> call_renumber( std::make_unique>(handle); if (multi_gpu) { + std::vector displacements(edge_counts.size(), edge_t{0}); + std::partial_sum(edge_counts.begin(), edge_counts.end() - 1, displacements.begin() + 1); + std::vector major_ptrs(edge_counts.size()); + std::vector minor_ptrs(major_ptrs.size()); + for (size_t i = 0; i < edge_counts.size(); ++i) { + major_ptrs[i] = shuffled_edgelist_major_vertices + displacements[i]; + minor_ptrs[i] = shuffled_edgelist_minor_vertices + displacements[i]; + } + std::tie( p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = cugraph::experimental::renumber_edgelist( - handle, - shuffled_edgelist_major_vertices, - shuffled_edgelist_minor_vertices, - num_edgelist_edges, - is_hypergraph_partitioned, - do_expensive_check); + handle, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); } else { - auto ret_f = cugraph::experimental::renumber_edgelist( + p_ret->get_dv() = cugraph::experimental::renumber_edgelist( handle, shuffled_edgelist_major_vertices, shuffled_edgelist_minor_vertices, - num_edgelist_edges, + edge_counts[0], do_expensive_check); - auto tot_vertices = static_cast(ret_f.size()); - - p_ret->get_dv() = std::move(ret_f); - cugraph::experimental::partition_t part_sg( - std::vector{0, tot_vertices}, false, 1, 1, 0, 0); - - p_ret->get_partition() = std::move(part_sg); + p_ret->get_partition() = cugraph::experimental::partition_t{}; // dummy - p_ret->get_num_vertices() = tot_vertices; - p_ret->get_num_edges() = num_edgelist_edges; + p_ret->get_num_vertices() = static_cast(p_ret->get_dv().size()); + p_ret->get_num_edges() = edge_counts[0]; } return p_ret; // RVO-ed (copy ellision) @@ -1142,53 +1256,47 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, float* edgelist_weights, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int32_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, float* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, double* edgelist_weights, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int32_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, int32_t* edgelist_minor_vertices, double* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int64_t* edgelist_major_vertices, int64_t* edgelist_minor_vertices, float* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); -template std::unique_ptr> call_shuffle( +template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int64_t* edgelist_major_vertices, int64_t* edgelist_minor_vertices, double* edgelist_weights, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned); + int64_t num_edgelist_edges); // TODO: add the remaining relevant EIDIr's: // @@ -1196,8 +1304,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); @@ -1205,8 +1312,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); @@ -1214,8 +1320,7 @@ template std::unique_ptr> call_renumber( raft::handle_t const& handle, int64_t* shuffled_edgelist_major_vertices /* [INOUT] */, int64_t* shuffled_edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool is_hypergraph_partitioned, + std::vector const& edge_counts, bool do_expensive_check, bool multi_gpu); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3b65b0edb29..89975f673ae 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,9 +20,10 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") @@ -445,7 +446,34 @@ if(BUILD_CUGRAPH_MG_TESTS) target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) ########################################################################################### - # - MG LOUVAIN tests --------------------------------------------------------------------- + # - MG KATZ CENTRALITY tests -------------------------------------------------------------- + + set(MG_KATZ_CENTRALITY_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_katz_centrality_test.cpp") + + ConfigureTest(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") + target_link_libraries(MG_KATZ_CENTRALITY_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG BFS tests -------------------------------------------------------------------------- + + set(MG_BFS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_bfs_test.cpp") + + ConfigureTest(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") + target_link_libraries(MG_BFS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG SSSP tests ------------------------------------------------------------------------- + + set(MG_SSSP_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_sssp_test.cpp") + + ConfigureTest(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") + target_link_libraries(MG_SSSP_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + ########################################################################################### + # - MG LOUVAIN tests ---------------------------------------------------------------------- set(MG_LOUVAIN_TEST_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" @@ -453,7 +481,6 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTest(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) - target_link_libraries(MG_LOUVAIN_TEST PRIVATE cugraph) else(MPI_CXX_FOUND) message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index e7fea43be42..d61080c685e 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -129,8 +128,10 @@ class Tests_InducedEgo : public ::testing::TestWithParam { ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] <= h_cugraph_ego_edge_offsets[i + 1]); auto n_vertices = graph_view.get_number_of_vertices(); for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { - ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); - ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); + ASSERT_TRUE( + cugraph::experimental::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i])); + ASSERT_TRUE( + cugraph::experimental::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i])); } /* diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index a7f95e6d718..661065ca65b 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -323,7 +323,8 @@ coarsen_graph( handle, edgelist, new_number_of_vertices, - cugraph::experimental::graph_properties_t{graph_view.is_symmetric(), false}, + cugraph::experimental::graph_properties_t{ + graph_view.is_symmetric(), false, graph_view.is_weighted()}, true); } diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index f6596a6b59a..8a1a3010a6f 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -31,10 +31,13 @@ #include -void compare(float modularity, float sg_modularity) { ASSERT_FLOAT_EQ(modularity, sg_modularity); } -void compare(double modularity, double sg_modularity) +void compare(float mg_modularity, float sg_modularity) { - ASSERT_DOUBLE_EQ(modularity, sg_modularity); + ASSERT_FLOAT_EQ(mg_modularity, sg_modularity); +} +void compare(double mg_modularity, double sg_modularity) +{ + ASSERT_DOUBLE_EQ(mg_modularity, sg_modularity); } //////////////////////////////////////////////////////////////////////////////// @@ -90,13 +93,13 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam cugraph::Dendrogram const& dendrogram, weight_t resolution, int rank, - weight_t modularity) + weight_t mg_modularity) { auto sg_graph = std::make_unique>( handle); rmm::device_uvector d_clustering_v(0, handle.get_stream()); - weight_t sg_modularity; + weight_t sg_modularity{-1.0}; if (rank == 0) { // Create initial SG graph, renumbered according to the MNMG renumber map @@ -160,7 +163,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam } }); - if (rank == 0) compare(modularity, sg_modularity); + if (rank == 0) compare(mg_modularity, sg_modularity); } // Compare the results of running louvain on multiple GPUs to that of a @@ -197,9 +200,9 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam auto mg_graph_view = mg_graph.view(); std::unique_ptr> dendrogram; - weight_t modularity; + weight_t mg_modularity; - std::tie(dendrogram, modularity) = + std::tie(dendrogram, mg_modularity) = cugraph::louvain(handle, mg_graph_view, param.max_level, param.resolution); SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); @@ -213,7 +216,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam *dendrogram, param.resolution, comm_rank, - modularity); + mg_modularity); } }; diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index ad9ece99ef9..8fce9488d8a 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -28,10 +30,16 @@ #include +#include #include #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void bfs_reference(edge_t const* offsets, vertex_t const* indices, @@ -74,9 +82,12 @@ void bfs_reference(edge_t const* offsets, typedef struct BFS_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; - size_t source{false}; - BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + size_t source{0}; + bool check_correctness{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -88,13 +99,43 @@ typedef struct BFS_Usecase_t { input_graph_specifier.graph_file_full_path = graph_file_full_path; }; - BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } BFS_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_BFS : public ::testing::TestWithParam { public: Tests_BFS() {} @@ -107,58 +148,21 @@ class Tests_BFS : public ::testing::TestWithParam { template void run_current_test(BFS_Usecase const& configuration) { + constexpr bool renumber = true; + using weight_t = float; raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, false, false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - false, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(configuration.source >= 0 && - configuration.source <= graph_view.get_number_of_vertices()) - << "Starting sources should be >= 0 and" - << " less than the number of vertices in the graph."; - - std::vector h_reference_distances(graph_view.get_number_of_vertices()); - std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); - - bfs_reference(h_offsets.data(), - h_indices.data(), - h_reference_distances.data(), - h_reference_predecessors.data(), - graph_view.get_number_of_vertices(), - static_cast(configuration.source), - std::numeric_limits::max()); + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < graph_view.get_number_of_vertices()) + << "Invalid starting source."; rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -169,46 +173,120 @@ class Tests_BFS : public ::testing::TestWithParam { cugraph::experimental::bfs(handle, graph_view, - d_distances.begin(), - d_predecessors.begin(), + d_distances.data(), + d_predecessors.data(), static_cast(configuration.source), false, - std::numeric_limits::max(), - false); + std::numeric_limits::max()); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); - std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); - raft::update_host(h_cugraph_predecessors.data(), - d_predecessors.data(), - d_predecessors.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(std::equal( - h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) - << "distances do not match with the reference values."; - - for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { - auto i = std::distance(h_cugraph_predecessors.begin(), it); - if (*it == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_reference_predecessors[i] == *it) - << "vertex reachability do not match with the reference."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto unrenumbered_source = static_cast(configuration.source); + if (renumber) { + std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + unrenumbered_source = h_renumber_map_labels[configuration.source]; + } + + std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors( + unrenumbered_graph_view.get_number_of_vertices()); + + bfs_reference(h_offsets.data(), + h_indices.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + unrenumbered_graph_view.get_number_of_vertices(), + unrenumbered_source, + std::numeric_limits::max()); + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + if (renumber) { + cugraph::experimental::unrenumber_local_int_vertices(handle, + d_predecessors.data(), + d_predecessors.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + true); + + auto d_unrenumbered_distances = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); + raft::update_host(h_cugraph_distances.data(), + d_unrenumbered_distances.data(), + d_unrenumbered_distances.size(), + handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_unrenumbered_predecessors.data(), + d_unrenumbered_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); } else { - ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) - << "distance to this vertex != distance to the predecessor vertex + 1."; - bool found{false}; - for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { - if (h_indices[j] == i) { - found = true; - break; + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } + + ASSERT_TRUE(std::equal( + h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability does not match with the reference."; + } else { + ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) + << "distance to this vertex != distance to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + found = true; + break; + } } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } - ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } } } @@ -221,12 +299,17 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_BFS, ::testing::Values( + // enable correctness checks BFS_Usecase("test/datasets/karate.mtx", 0), BFS_Usecase("test/datasets/polbooks.mtx", 0), BFS_Usecase("test/datasets/netscience.mtx", 0), BFS_Usecase("test/datasets/netscience.mtx", 100), BFS_Usecase("test/datasets/wiki2003.mtx", 1000), BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000), - BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 789619f2cd9..0fc0634bbbc 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -54,13 +54,14 @@ void check_coarsened_graph_results(edge_t* org_offsets, ASSERT_TRUE(std::count_if(org_indices, org_indices + org_offsets[num_org_vertices], [num_org_vertices](auto nbr) { - return !cugraph::test::is_valid_vertex(num_org_vertices, nbr); + return !cugraph::experimental::is_valid_vertex(num_org_vertices, nbr); }) == 0); ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); ASSERT_TRUE(std::count_if(coarse_indices, coarse_indices + coarse_offsets[num_coarse_vertices], [num_coarse_vertices](auto nbr) { - return !cugraph::test::is_valid_vertex(num_coarse_vertices, nbr); + return !cugraph::experimental::is_valid_vertex(num_coarse_vertices, + nbr); }) == 0); ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 666106d62ca..221accea4f7 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -201,17 +202,19 @@ class Tests_GenerateRmat : public ::testing::TestWithParam (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) && (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor)) << "Returned an invalid number of R-mat graph edges."; - ASSERT_TRUE( - std::count_if(h_cugraph_srcs.begin(), - h_cugraph_srcs.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + [num_vertices = static_cast( + size_t{1} << configuration.scale)](auto v) { + return !cugraph::experimental::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid source vertex IDs."; - ASSERT_TRUE( - std::count_if(h_cugraph_dsts.begin(), - h_cugraph_dsts.end(), - [num_vertices = static_cast(size_t{1} << configuration.scale)]( - auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0) + ASSERT_TRUE(std::count_if(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + [num_vertices = static_cast( + size_t{1} << configuration.scale)](auto v) { + return !cugraph::experimental::is_valid_vertex(num_vertices, v); + }) == 0) << "Returned R-mat graph edges have invalid destination vertex IDs."; if (!scramble) { diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index 949f6d2e08e..6ce32e0c836 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -139,7 +139,7 @@ class Tests_Graph : public ::testing::TestWithParam { handle, edgelist, number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, configuration.test_weighted}, false, true); diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 776bb60716c..71011f3d018 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -34,6 +36,11 @@ #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void katz_centrality_reference(edge_t const* offsets, vertex_t const* indices, @@ -92,9 +99,12 @@ typedef struct KatzCentrality_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; bool test_weighted{false}; + bool check_correctness{false}; - KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) + KatzCentrality_Usecase_t(std::string const& graph_file_path, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -107,15 +117,45 @@ typedef struct KatzCentrality_Usecase_t { }; KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, - double personalization_ratio, - bool test_weighted) - : test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } KatzCentrality_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_KatzCentrality : public ::testing::TestWithParam { public: Tests_KatzCentrality() {} @@ -128,76 +168,26 @@ class Tests_KatzCentrality : public ::testing::TestWithParam void run_current_test(KatzCentrality_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights{}; - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - if (graph_view.is_weighted()) { - h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - } - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - std::vector h_reference_katz_centralities(graph_view.get_number_of_vertices()); - - std::vector tmps(h_offsets.size()); - std::adjacent_difference(h_offsets.begin(), h_offsets.end(), tmps.begin()); - auto max_it = std::max_element(tmps.begin(), tmps.end()); + auto degrees = graph_view.compute_in_degrees(handle); + std::vector h_degrees(degrees.size()); + raft::update_host(h_degrees.data(), degrees.data(), degrees.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + auto max_it = std::max_element(h_degrees.begin(), h_degrees.end()); result_t const alpha = result_t{1.0} / static_cast(*max_it + 1); result_t constexpr beta{1.0}; result_t constexpr epsilon{1e-6}; - katz_centrality_reference( - h_offsets.data(), - h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - static_cast(nullptr), - h_reference_katz_centralities.data(), - graph_view.get_number_of_vertices(), - alpha, - beta, - epsilon, - std::numeric_limits::max(), - false, - true); - rmm::device_uvector d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -206,39 +196,98 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(nullptr), - d_katz_centralities.begin(), + d_katz_centralities.data(), alpha, beta, epsilon, std::numeric_limits::max(), false, - true, - false); + true); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); - - raft::update_host(h_cugraph_katz_centralities.data(), - d_katz_centralities.data(), - d_katz_centralities.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), - h_reference_katz_centralities.end(), - h_cugraph_katz_centralities.begin(), - nearly_equal)) - << "Katz centrality values do not match with the reference values."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + if (unrenumbered_graph_view.is_weighted()) { + h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + std::vector h_reference_katz_centralities( + unrenumbered_graph_view.get_number_of_vertices()); + + katz_centrality_reference( + h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + static_cast(nullptr), + h_reference_katz_centralities.data(), + unrenumbered_graph_view.get_number_of_vertices(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + true); + + std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); + if (renumber) { + auto d_unrenumbered_katz_centralities = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_katz_centralities.data(), + d_renumber_map_labels.size()); + raft::update_host(h_cugraph_katz_centralities.data(), + d_unrenumbered_katz_centralities.data(), + d_unrenumbered_katz_centralities.size(), + handle.get_stream()); + } else { + raft::update_host(h_cugraph_katz_centralities.data(), + d_katz_centralities.data(), + d_katz_centralities.size(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), + h_reference_katz_centralities.end(), + h_cugraph_katz_centralities.begin(), + nearly_equal)) + << "Katz centrality values do not match with the reference values."; + } } }; @@ -252,6 +301,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_KatzCentrality, ::testing::Values( + // enable correctness checks KatzCentrality_Usecase("test/datasets/karate.mtx", false), KatzCentrality_Usecase("test/datasets/karate.mtx", true), KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), @@ -261,16 +311,15 @@ INSTANTIATE_TEST_CASE_P( KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, - false), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, false), KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.0, true), - KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, - 0.5, - true))); + // disable correctness checks for large graphs + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + false, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + true, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp new file mode 100644 index 00000000000..76ccb5d9de3 --- /dev/null +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct BFS_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + size_t source{0}; + bool check_correctness{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} BFS_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, BFS_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, false, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + false, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGBFS : public ::testing::TestWithParam { + public: + Tests_MGBFS() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running BFS on multiple GPUs to that of a single-GPU run + template + void run_current_test(BFS_Usecase const& configuration) + { + using weight_t = float; + + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < + mg_graph_view.get_number_of_vertices()) + << "Invalid starting source."; + + // 3. run MG BFS + + rmm::device_uvector d_mg_distances(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::bfs(handle, + mg_graph_view, + d_mg_distances.data(), + d_mg_predecessors.data(), + static_cast(configuration.source), + false, + std::numeric_limits::max(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + + rmm::device_scalar d_source(static_cast(configuration.source), + handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_source.data(), + size_t{1}, + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + auto unrenumbered_source = d_source.value(handle.get_stream()); + + // 5-2. run SG BFS + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + cugraph::experimental::bfs(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + false, + std::numeric_limits::max(), + true); + + // 5-3. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); + std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_predecessors.data(), + d_mg_predecessors.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + raft::update_host(h_mg_predecessors.data(), + d_mg_predecessors.data(), + d_mg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(h_mg_distances[i] == h_sg_distances[mapped_vertex]) + << "MG BFS distance for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_distances[i] + << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; + if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + ASSERT_TRUE(h_sg_distances[h_mg_predecessors[i]] + 1 == h_sg_distances[mapped_vertex]) + << "distances to this vertex != distances to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_predecessors[i]]; + j < h_sg_offsets[h_mg_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == mapped_vertex) { + found = true; + break; + } + } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; + } + } + } + } +}; + +TEST_P(Tests_MGBFS, CheckInt32Int32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGBFS, + ::testing::Values( + // enable correctness checks + BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/web-Google.mtx", 0), + BFS_Usecase("test/datasets/ljournal-2008.mtx", 0), + BFS_Usecase("test/datasets/webbase-1M.mtx", 0), + BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + BFS_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp new file mode 100644 index 00000000000..e3033af3771 --- /dev/null +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct KatzCentrality_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + bool test_weighted{false}; + bool check_correctness{false}; + + KatzCentrality_Usecase_t(std::string const& graph_file_path, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params, + bool test_weighted, + bool check_correctness = true) + : test_weighted(test_weighted), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} KatzCentrality_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, KatzCentrality_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGKatzCentrality : public ::testing::TestWithParam { + public: + Tests_MGKatzCentrality() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running Katz Centrality on multiple GPUs to that of a single-GPU run + template + void run_current_test(KatzCentrality_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + // 3. compute max in-degree + + auto max_in_degree = mg_graph_view.compute_max_in_degree(handle); + + // 4. run MG Katz Centrality + + result_t const alpha = result_t{1.0} / static_cast(max_in_degree + 1); + result_t constexpr beta{1.0}; + result_t constexpr epsilon{1e-6}; + + rmm::device_uvector d_mg_katz_centralities( + mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::katz_centrality(handle, + mg_graph_view, + static_cast(nullptr), + d_mg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + // 5-3. run SG Katz Centrality + + rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::katz_centrality(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), // max_iterations + false, + true); + + // 5-4. compare + + std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_katz_centralities.data(), + d_sg_katz_centralities.data(), + d_sg_katz_centralities.size(), + handle.get_stream()); + + std::vector h_mg_katz_centralities(mg_graph_view.get_number_of_local_vertices()); + raft::update_host(h_mg_katz_centralities.data(), + d_mg_katz_centralities.data(), + d_mg_katz_centralities.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_katz_centralities[i], h_sg_katz_centralities[mapped_vertex])) + << "MG KatzCentrality value for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_katz_centralities[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_katz_centralities[mapped_vertex]; + } + } + } +}; + +TEST_P(Tests_MGKatzCentrality, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGKatzCentrality, + ::testing::Values( + // enable correctness checks + KatzCentrality_Usecase("test/datasets/karate.mtx", false), + KatzCentrality_Usecase("test/datasets/karate.mtx", true), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, + true), + // disable correctness checks for large graphs + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + false, + false), + KatzCentrality_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + true, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp new file mode 100644 index 00000000000..48e4dc869f4 --- /dev/null +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +typedef struct SSSP_Usecase_t { + cugraph::test::input_graph_specifier_t input_graph_specifier{}; + + size_t source{0}; + bool check_correctness{false}; + + SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + std::string graph_file_full_path{}; + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH; + input_graph_specifier.graph_file_full_path = graph_file_full_path; + }; + + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) + { + input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; + input_graph_specifier.rmat_params = rmat_params; + } +} SSSP_Usecase; + +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + renumber, + partition_ids, + static_cast(comm_size)); +} + +class Tests_MGSSSP : public ::testing::TestWithParam { + public: + Tests_MGSSSP() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running SSSP on multiple GPUs to that of a single-GPU run + template + void run_current_test(SSSP_Usecase const& configuration) + { + // 1. initialize handle + + raft::handle_t handle{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + read_graph(handle, configuration, true); + + auto mg_graph_view = mg_graph.view(); + + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < + mg_graph_view.get_number_of_vertices()) + << "Invalid starting source."; + + // 3. run MG SSSP + + rmm::device_uvector d_mg_distances(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // FIXME: disable do_expensive_check + cugraph::experimental::sssp(handle, + mg_graph_view, + d_mg_distances.data(), + d_mg_predecessors.data(), + static_cast(configuration.source), + std::numeric_limits::max(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // 5. copmare SG & MG results + + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + + rmm::device_scalar d_source(static_cast(configuration.source), + handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_source.data(), + size_t{1}, + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + auto unrenumbered_source = d_source.value(handle.get_stream()); + + // 5-2. run SG SSSP + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + // FIXME: disable do_expensive_check + cugraph::experimental::sssp(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + std::numeric_limits::max(), + true); + + // 5-3. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_sg_weights.data(), + sg_graph_view.weights(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); + std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_predecessors.data(), + d_mg_predecessors.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + true); + raft::update_host(h_mg_predecessors.data(), + d_mg_predecessors.data(), + d_mg_predecessors.size(), + handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_distances[i], h_sg_distances[mapped_vertex])) + << "MG SSSP distance for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_distances[i] + << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; + if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + auto pred_distance = h_sg_distances[h_mg_predecessors[i]]; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_predecessors[i]]; + j < h_sg_offsets[h_mg_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == mapped_vertex) { + if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[mapped_vertex])) { + found = true; + break; + } + } + } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; + } + } + } + } +}; + +TEST_P(Tests_MGSSSP, CheckInt32Int32Float) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_MGSSSP, + ::testing::Values( + // enable correctness checks + SSSP_Usecase("test/datasets/karate.mtx", 0), + SSSP_Usecase("test/datasets/dblp.mtx", 0), + SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index ff3b073cbc7..649fe11d805 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -35,6 +37,11 @@ #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + template void pagerank_reference(edge_t const* offsets, vertex_t const* indices, @@ -128,11 +135,15 @@ typedef struct PageRank_Usecase_t { double personalization_ratio{0.0}; bool test_weighted{false}; + bool check_correctness{false}; PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -146,14 +157,47 @@ typedef struct PageRank_Usecase_t { PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } PageRank_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_PageRank : public ::testing::TestWithParam { public: Tests_PageRank() {} @@ -166,52 +210,16 @@ class Tests_PageRank : public ::testing::TestWithParam { template void run_current_test(PageRank_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights{}; - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - if (graph_view.is_weighted()) { - h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - } - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - std::vector h_personalization_vertices{}; std::vector h_personalization_values{}; if (configuration.personalization_ratio > 0.0) { @@ -260,21 +268,6 @@ class Tests_PageRank : public ::testing::TestWithParam { result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; - std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); - - pagerank_reference(h_offsets.data(), - h_indices.data(), - h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - h_personalization_vertices.data(), - h_personalization_values.data(), - h_reference_pageranks.data(), - graph_view.get_number_of_vertices(), - static_cast(h_personalization_vertices.size()), - alpha, - epsilon, - std::numeric_limits::max(), - false); - rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -286,7 +279,7 @@ class Tests_PageRank : public ::testing::TestWithParam { d_personalization_vertices.data(), d_personalization_values.data(), static_cast(d_personalization_vertices.size()), - d_pageranks.begin(), + d_pageranks.data(), alpha, epsilon, std::numeric_limits::max(), @@ -295,26 +288,129 @@ class Tests_PageRank : public ::testing::TestWithParam { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), - h_reference_pageranks.end(), - h_cugraph_pageranks.begin(), - nearly_equal)) - << "PageRank values do not match with the reference values."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + if (unrenumbered_graph_view.is_weighted()) { + h_weights.assign(unrenumbered_graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + std::vector h_unrenumbered_personalization_vertices( + d_personalization_vertices.size()); + std::vector h_unrenumbered_personalization_values( + h_unrenumbered_personalization_vertices.size()); + if (renumber) { + rmm::device_uvector d_unrenumbered_personalization_vertices( + d_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_unrenumbered_personalization_values( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_vertices.data(), + d_personalization_vertices.data(), + d_personalization_vertices.size(), + handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_values.data(), + d_personalization_values.data(), + d_personalization_values.size(), + handle.get_stream()); + cugraph::experimental::unrenumber_local_int_vertices( + handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices()); + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); + + raft::update_host(h_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + handle.get_stream()); + raft::update_host(h_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_values.size(), + handle.get_stream()); + } else { + raft::update_host(h_unrenumbered_personalization_vertices.data(), + d_personalization_vertices.data(), + d_personalization_vertices.size(), + handle.get_stream()); + raft::update_host(h_unrenumbered_personalization_values.data(), + d_personalization_values.data(), + d_personalization_values.size(), + handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + std::vector h_reference_pageranks(unrenumbered_graph_view.get_number_of_vertices()); + + pagerank_reference(h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_unrenumbered_personalization_vertices.data(), + h_unrenumbered_personalization_values.data(), + h_reference_pageranks.data(), + unrenumbered_graph_view.get_number_of_vertices(), + static_cast(h_personalization_vertices.size()), + alpha, + epsilon, + std::numeric_limits::max(), + false); + + std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); + if (renumber) { + auto d_unrenumbered_pageranks = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_pageranks.data(), d_renumber_map_labels.size()); + raft::update_host(h_cugraph_pageranks.data(), + d_unrenumbered_pageranks.data(), + d_unrenumbered_pageranks.size(), + handle.get_stream()); + } else { + raft::update_host( + h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); + } + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), + h_reference_pageranks.end(), + h_cugraph_pageranks.begin(), + nearly_equal)) + << "PageRank values do not match with the reference values."; + } } }; @@ -328,6 +424,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_PageRank, ::testing::Values( + // enable correctness checks PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), @@ -355,6 +452,15 @@ INSTANTIATE_TEST_CASE_P( true), PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0.5, - true))); + true), + // disable correctness checks for large graphs + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/rw_low_level_test.cu b/cpp/tests/experimental/rw_low_level_test.cu index a32e258d366..8b562bc41f6 100644 --- a/cpp/tests/experimental/rw_low_level_test.cu +++ b/cpp/tests/experimental/rw_low_level_test.cu @@ -53,7 +53,8 @@ graph_t make_graph(raft::handle_t cons std::vector const& v_dst, std::vector const& v_w, vertex_t num_vertices, - edge_t num_edges) + edge_t num_edges, + bool is_weighted) { vector_test_t d_src(num_edges, handle.get_stream()); vector_test_t d_dst(num_edges, handle.get_stream()); @@ -67,7 +68,7 @@ graph_t make_graph(raft::handle_t cons d_src.data(), d_dst.data(), d_weights.data(), num_edges}; graph_t graph( - handle, edgelist, num_vertices, graph_properties_t{}, false); + handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); return graph; } @@ -119,7 +120,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -199,7 +200,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -275,7 +276,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -371,7 +372,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -449,7 +450,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -521,7 +522,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -666,7 +667,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -741,7 +742,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges); + auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 611abcb0d75..9364d261dec 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -16,9 +16,11 @@ #include #include +#include #include #include +#include #include #include @@ -28,12 +30,18 @@ #include +#include #include #include #include #include #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + // Dijkstra's algorithm template void sssp_reference(edge_t const* offsets, @@ -80,9 +88,12 @@ void sssp_reference(edge_t const* offsets, typedef struct SSSP_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; - size_t source{false}; - SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + size_t source{0}; + bool check_correctness{false}; + + SSSP_Usecase_t(std::string const& graph_file_path, size_t source, bool check_correctness = true) + : source(source), check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -94,13 +105,43 @@ typedef struct SSSP_Usecase_t { input_graph_specifier.graph_file_full_path = graph_file_full_path; }; - SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source) + SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, + size_t source, + bool check_correctness = true) + : source(source), check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } SSSP_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, SSSP_Usecase const& configuration, bool renumber) +{ + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.input_graph_specifier.graph_file_full_path, true, renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + true, + renumber, + std::vector{0}, + size_t{1}); +} + class Tests_SSSP : public ::testing::TestWithParam { public: Tests_SSSP() {} @@ -113,61 +154,18 @@ class Tests_SSSP : public ::testing::TestWithParam { template void run_current_test(SSSP_Usecase const& configuration) { + constexpr bool renumber = true; + raft::handle_t handle{}; cugraph::experimental::graph_t graph(handle); - std::tie(graph, std::ignore) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, configuration.input_graph_specifier.graph_file_full_path, true, false) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - true, - false); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + read_graph(handle, configuration, renumber); auto graph_view = graph.view(); - std::vector h_offsets(graph_view.get_number_of_vertices() + 1); - std::vector h_indices(graph_view.get_number_of_edges()); - std::vector h_weights(graph_view.get_number_of_edges()); - raft::update_host(h_offsets.data(), - graph_view.offsets(), - graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_indices.data(), - graph_view.indices(), - graph_view.get_number_of_edges(), - handle.get_stream()); - raft::update_host(h_weights.data(), - graph_view.weights(), - graph_view.get_number_of_edges(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - ASSERT_TRUE(configuration.source >= 0 && - configuration.source <= graph_view.get_number_of_vertices()) - << "Starting sources should be >= 0 and" - << " less than the number of vertices in the graph."; - - std::vector h_reference_distances(graph_view.get_number_of_vertices()); - std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); - - sssp_reference(h_offsets.data(), - h_indices.data(), - h_weights.data(), - h_reference_distances.data(), - h_reference_predecessors.data(), - graph_view.get_number_of_vertices(), - static_cast(configuration.source)); + ASSERT_TRUE(static_cast(configuration.source) >= 0 && + static_cast(configuration.source) < graph_view.get_number_of_vertices()); rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -178,53 +176,135 @@ class Tests_SSSP : public ::testing::TestWithParam { cugraph::experimental::sssp(handle, graph_view, - d_distances.begin(), - d_predecessors.begin(), + d_distances.data(), + d_predecessors.data(), static_cast(configuration.source), std::numeric_limits::max(), false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); - std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); - - raft::update_host( - h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); - raft::update_host(h_cugraph_predecessors.data(), - d_predecessors.data(), - d_predecessors.size(), - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - auto max_weight_element = std::max_element(h_weights.begin(), h_weights.end()); - auto epsilon = *max_weight_element * weight_t{1e-6}; - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; - - ASSERT_TRUE(std::equal(h_reference_distances.begin(), - h_reference_distances.end(), - h_cugraph_distances.begin(), - nearly_equal)) - << "distances do not match with the reference values."; - - for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { - auto i = std::distance(h_cugraph_predecessors.begin(), it); - if (*it == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_reference_predecessors[i] == *it) - << "vertex reachability do not match with the reference."; + if (configuration.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + read_graph(handle, configuration, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + std::vector h_weights(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_weights.data(), + unrenumbered_graph_view.weights(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto unrenumbered_source = static_cast(configuration.source); + if (renumber) { + std::vector h_renumber_map_labels(d_renumber_map_labels.size()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + unrenumbered_source = h_renumber_map_labels[configuration.source]; + } + + std::vector h_reference_distances(unrenumbered_graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors( + unrenumbered_graph_view.get_number_of_vertices()); + + sssp_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + unrenumbered_graph_view.get_number_of_vertices(), + unrenumbered_source, + std::numeric_limits::max()); + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + if (renumber) { + cugraph::experimental::unrenumber_local_int_vertices(handle, + d_predecessors.data(), + d_predecessors.size(), + d_renumber_map_labels.data(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + true); + + auto d_unrenumbered_distances = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); + auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); + + raft::update_host(h_cugraph_distances.data(), + d_unrenumbered_distances.data(), + d_unrenumbered_distances.size(), + handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_unrenumbered_predecessors.data(), + d_unrenumbered_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); } else { - auto pred_distance = h_reference_distances[*it]; - bool found{false}; - for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { - if (h_indices[j] == i) { - if (nearly_equal(pred_distance + h_weights[j], h_reference_distances[i])) { - found = true; - break; + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } + + auto max_weight_element = std::max_element(h_weights.begin(), h_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + ASSERT_TRUE(std::equal(h_reference_distances.begin(), + h_reference_distances.end(), + h_cugraph_distances.begin(), + nearly_equal)) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability do not match with the reference."; + } else { + auto pred_distance = h_reference_distances[*it]; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + if (nearly_equal(pred_distance + h_weights[j], h_reference_distances[i])) { + found = true; + break; + } } } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; } - ASSERT_TRUE(found) - << "no edge from the predecessor vertex to this vertex with the matching weight."; } } } @@ -237,9 +317,14 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_SSSP, ::testing::Values( + // enable correctness checks SSSP_Usecase("test/datasets/karate.mtx", 0), SSSP_Usecase("test/datasets/dblp.mtx", 0), SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), - SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0))); + SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), + // disable correctness checks for large graphs + SSSP_Usecase(cugraph::test::rmat_params_t{20, 16, 0.57, 0.19, 0.19, 0, false, false}, + 0, + false))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 85ee9a4243e..f7b1e8dfbb4 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -16,13 +16,19 @@ #include #include +#include #include +#include +#include +#include #include #include #include #include +#include +#include #include @@ -33,11 +39,15 @@ typedef struct PageRank_Usecase_t { double personalization_ratio{0.0}; bool test_weighted{false}; + bool check_correctness{false}; PageRank_Usecase_t(std::string const& graph_file_path, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { std::string graph_file_full_path{}; if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { @@ -51,14 +61,56 @@ typedef struct PageRank_Usecase_t { PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params, double personalization_ratio, - bool test_weighted) - : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + bool test_weighted, + bool check_correctness = true) + : personalization_ratio(personalization_ratio), + test_weighted(test_weighted), + check_correctness(check_correctness) { input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::RMAT_PARAMS; input_graph_specifier.rmat_params = rmat_params; } } PageRank_Usecase; +template +std::tuple, + rmm::device_uvector> +read_graph(raft::handle_t const& handle, PageRank_Usecase const& configuration, bool renumber) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + std::vector partition_ids(multi_gpu ? size_t{1} : static_cast(comm_size)); + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + + return configuration.input_graph_specifier.tag == + cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH + ? cugraph::test:: + read_graph_from_matrix_market_file( + handle, + configuration.input_graph_specifier.graph_file_full_path, + configuration.test_weighted, + renumber) + : cugraph::test:: + generate_graph_from_rmat_params( + handle, + configuration.input_graph_specifier.rmat_params.scale, + configuration.input_graph_specifier.rmat_params.edge_factor, + configuration.input_graph_specifier.rmat_params.a, + configuration.input_graph_specifier.rmat_params.b, + configuration.input_graph_specifier.rmat_params.c, + configuration.input_graph_specifier.rmat_params.seed, + configuration.input_graph_specifier.rmat_params.undirected, + configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, + configuration.test_weighted, + renumber, + partition_ids, + static_cast(comm_size)); +} + class Tests_MGPageRank : public ::testing::TestWithParam { public: Tests_MGPageRank() {} @@ -68,7 +120,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { virtual void SetUp() {} virtual void TearDown() {} - // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run + // Compare the results of running PageRank on multiple GPUs to that of a single-GPU run template void run_current_test(PageRank_Usecase const& configuration) { @@ -86,168 +138,40 @@ class Tests_MGPageRank : public ::testing::TestWithParam { cugraph::partition_2d::subcomm_factory_t subcomm_factory(handle, row_comm_size); - // 2. create SG & MG graphs - - cugraph::experimental::graph_t sg_graph(handle); - rmm::device_uvector d_sg_renumber_map_labels(0, handle.get_stream()); - std::tie(sg_graph, d_sg_renumber_map_labels) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test:: - read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - true) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - true); - - auto sg_graph_view = sg_graph.view(); + // 2. create MG graph cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = - configuration.input_graph_specifier.tag == - cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH - ? cugraph::test::read_graph_from_matrix_market_file( - handle, - configuration.input_graph_specifier.graph_file_full_path, - configuration.test_weighted, - true) - : cugraph::test::generate_graph_from_rmat_params( - handle, - configuration.input_graph_specifier.rmat_params.scale, - configuration.input_graph_specifier.rmat_params.edge_factor, - configuration.input_graph_specifier.rmat_params.a, - configuration.input_graph_specifier.rmat_params.b, - configuration.input_graph_specifier.rmat_params.c, - configuration.input_graph_specifier.rmat_params.seed, - configuration.input_graph_specifier.rmat_params.undirected, - configuration.input_graph_specifier.rmat_params.scramble_vertex_ids, - configuration.test_weighted, - true); + read_graph(handle, configuration, true); auto mg_graph_view = mg_graph.view(); - std::vector h_sg_renumber_map_labels(d_sg_renumber_map_labels.size()); - raft::update_host(h_sg_renumber_map_labels.data(), - d_sg_renumber_map_labels.data(), - d_sg_renumber_map_labels.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(mg_graph_view.get_number_of_local_vertices()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); + // 3. generate personalization vertex/value pairs - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - // 2. generate personalization vertex/value pairs - - std::vector h_personalization_vertices{}; - std::vector h_personalization_values{}; + std::vector h_mg_personalization_vertices{}; + std::vector h_mg_personalization_values{}; if (configuration.personalization_ratio > 0.0) { - std::default_random_engine generator{}; + std::default_random_engine generator{ + static_cast(comm.get_rank()) /* seed */}; std::uniform_real_distribution distribution{0.0, 1.0}; - h_personalization_vertices.resize(sg_graph_view.get_number_of_vertices()); - std::iota(h_personalization_vertices.begin(), h_personalization_vertices.end(), vertex_t{0}); - h_personalization_vertices.erase( - std::remove_if(h_personalization_vertices.begin(), - h_personalization_vertices.end(), + h_mg_personalization_vertices.resize(mg_graph_view.get_number_of_local_vertices()); + std::iota(h_mg_personalization_vertices.begin(), + h_mg_personalization_vertices.end(), + mg_graph_view.get_local_vertex_first()); + h_mg_personalization_vertices.erase( + std::remove_if(h_mg_personalization_vertices.begin(), + h_mg_personalization_vertices.end(), [&generator, &distribution, configuration](auto v) { return distribution(generator) >= configuration.personalization_ratio; }), - h_personalization_vertices.end()); - h_personalization_values.resize(h_personalization_vertices.size()); - std::for_each(h_personalization_values.begin(), - h_personalization_values.end(), + h_mg_personalization_vertices.end()); + h_mg_personalization_values.resize(h_mg_personalization_vertices.size()); + std::for_each(h_mg_personalization_values.begin(), + h_mg_personalization_values.end(), [&distribution, &generator](auto& val) { val = distribution(generator); }); } - result_t constexpr alpha{0.85}; - result_t constexpr epsilon{1e-6}; - - // 3. run SG pagerank - - std::vector h_sg_personalization_vertices{}; - std::vector h_sg_personalization_values{}; - if (h_personalization_vertices.size() > 0) { - for (vertex_t i = 0; i < sg_graph_view.get_number_of_vertices(); ++i) { - auto it = std::lower_bound(h_personalization_vertices.begin(), - h_personalization_vertices.end(), - h_sg_renumber_map_labels[i]); - if (*it == h_sg_renumber_map_labels[i]) { - h_sg_personalization_vertices.push_back(i); - h_sg_personalization_values.push_back( - h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); - } - } - } - - rmm::device_uvector d_sg_personalization_vertices( - h_sg_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_sg_personalization_values(d_sg_personalization_vertices.size(), - handle.get_stream()); - if (d_sg_personalization_vertices.size() > 0) { - raft::update_device(d_sg_personalization_vertices.data(), - h_sg_personalization_vertices.data(), - h_sg_personalization_vertices.size(), - handle.get_stream()); - raft::update_device(d_sg_personalization_values.data(), - h_sg_personalization_values.data(), - h_sg_personalization_values.size(), - handle.get_stream()); - } - - rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::pagerank(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_personalization_vertices.data(), - d_sg_personalization_values.data(), - static_cast(d_sg_personalization_vertices.size()), - d_sg_pageranks.begin(), - alpha, - epsilon, - std::numeric_limits::max(), // max_iterations - false, - false); - - std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - - // 4. run MG pagerank - - std::vector h_mg_personalization_vertices{}; - std::vector h_mg_personalization_values{}; - if (h_personalization_vertices.size() > 0) { - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto it = std::lower_bound(h_personalization_vertices.begin(), - h_personalization_vertices.end(), - h_mg_renumber_map_labels[i]); - if (*it == h_mg_renumber_map_labels[i]) { - h_mg_personalization_vertices.push_back(mg_graph_view.get_local_vertex_first() + i); - h_mg_personalization_values.push_back( - h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]); - } - } - } - rmm::device_uvector d_mg_personalization_vertices( h_mg_personalization_vertices.size(), handle.get_stream()); rmm::device_uvector d_mg_personalization_values(d_mg_personalization_vertices.size(), @@ -263,6 +187,11 @@ class Tests_MGPageRank : public ::testing::TestWithParam { handle.get_stream()); } + // 4. run MG PageRank + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); @@ -274,44 +203,145 @@ class Tests_MGPageRank : public ::testing::TestWithParam { d_mg_personalization_vertices.data(), d_mg_personalization_values.data(), static_cast(d_mg_personalization_vertices.size()), - d_mg_pageranks.begin(), + d_mg_pageranks.data(), alpha, epsilon, std::numeric_limits::max(), - false, false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - // 5. copmare SG & MG results - std::vector h_sg_shuffled_pageranks(sg_graph_view.get_number_of_vertices(), - result_t{0.0}); - for (size_t i = 0; i < h_sg_pageranks.size(); ++i) { - h_sg_shuffled_pageranks[h_sg_renumber_map_labels[i]] = h_sg_pageranks[i]; - } + if (configuration.check_correctness) { + // 5-1. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + read_graph(handle, configuration, false); + + auto sg_graph_view = sg_graph.view(); + + // 5-2. collect personalization vertex/value pairs + + rmm::device_uvector d_sg_personalization_vertices(0, handle.get_stream()); + rmm::device_uvector d_sg_personalization_values(0, handle.get_stream()); + if (configuration.personalization_ratio > 0.0) { + rmm::device_uvector d_unrenumbered_personalization_vertices( + d_mg_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_unrenumbered_personalization_values( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_vertices.data(), + d_mg_personalization_vertices.data(), + d_mg_personalization_vertices.size(), + handle.get_stream()); + raft::copy_async(d_unrenumbered_personalization_values.data(), + d_mg_personalization_values.data(), + d_mg_personalization_values.size(), + handle.get_stream()); + + std::vector vertex_partition_lasts(comm_size); + for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { + vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); + } + cugraph::experimental::unrenumber_int_vertices( + handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_vertices.size(), + d_mg_renumber_map_labels.data(), + mg_graph_view.get_local_vertex_first(), + mg_graph_view.get_local_vertex_last(), + vertex_partition_lasts, + handle.get_stream()); + + rmm::device_scalar d_local_personalization_vector_size( + d_unrenumbered_personalization_vertices.size(), handle.get_stream()); + rmm::device_uvector d_recvcounts(comm_size, handle.get_stream()); + comm.allgather( + d_local_personalization_vector_size.data(), d_recvcounts.data(), 1, handle.get_stream()); + std::vector recvcounts(d_recvcounts.size()); + raft::update_host( + recvcounts.data(), d_recvcounts.data(), d_recvcounts.size(), handle.get_stream()); + auto status = comm.sync_stream(handle.get_stream()); + ASSERT_EQ(status, raft::comms::status_t::SUCCESS); + + std::vector displacements(recvcounts.size(), size_t{0}); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + + d_sg_personalization_vertices.resize(displacements.back() + recvcounts.back(), + handle.get_stream()); + d_sg_personalization_values.resize(d_sg_personalization_vertices.size(), + handle.get_stream()); + + comm.allgatherv(d_unrenumbered_personalization_vertices.data(), + d_sg_personalization_vertices.data(), + recvcounts.data(), + displacements.data(), + handle.get_stream()); + comm.allgatherv(d_unrenumbered_personalization_values.data(), + d_sg_personalization_values.data(), + recvcounts.data(), + displacements.data(), + handle.get_stream()); + + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); + } + + // 5-3. run SG PageRank + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank(handle, + sg_graph_view, + static_cast(nullptr), + d_sg_personalization_vertices.data(), + d_sg_personalization_values.data(), + static_cast(d_sg_personalization_vertices.size()), + d_sg_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-4. compare + + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); + raft::update_host( + h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); + + std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); + raft::update_host(h_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.data(), + d_mg_renumber_map_labels.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_shuffled_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << i << " in rank: " << comm_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_shuffled_pageranks[mapped_vertex]; + for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { + auto mapped_vertex = h_mg_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << mapped_vertex << " in rank: " << comm_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " + << h_sg_pageranks[mapped_vertex]; + } } } }; @@ -325,6 +355,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_MGPageRank, ::testing::Values( + // enable correctness checks PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), @@ -352,6 +383,15 @@ INSTANTIATE_TEST_CASE_P( true), PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0.5, - true))); + true), + // disable correctness checks for large graphs + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, false, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.0, true, false), + PageRank_Usecase( + cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0.5, true, false))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu index 1b9fe6051f7..a9df392d2fb 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -28,7 +29,7 @@ namespace cugraph { namespace test { -namespace detail { +namespace { template , rmm::device_uvector>> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +generate_graph_from_edgelist_impl(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) { CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); @@ -59,95 +60,88 @@ generate_graph_from_edgelist(raft::handle_t const& handle, auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); - vertex_t number_of_vertices = static_cast(vertices.size()); - - auto vertex_key_func = - cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; - vertices.resize(thrust::distance(vertices.begin(), - thrust::remove_if( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertices.begin(), - vertices.end(), - [comm_rank, key_func = vertex_key_func] __device__(auto val) { - return key_func(val) != comm_rank; - })), - handle.get_stream()); - vertices.shrink_to_fit(handle.get_stream()); - - auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ - false, comm_size, row_comm_size, col_comm_size}; - size_t number_of_local_edges{}; - if (test_weighted) { - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin(), edgelist_weights.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } else { - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - number_of_local_edges = thrust::distance( - edge_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_rows.size(), - [comm_rank, key_func = edge_key_func] __device__(auto e) { - auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); - auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); - return key_func(major, minor) != comm_rank; - })); - } - - edgelist_rows.resize(number_of_local_edges, handle.get_stream()); - edgelist_rows.shrink_to_fit(handle.get_stream()); - edgelist_cols.resize(number_of_local_edges, handle.get_stream()); - edgelist_cols.shrink_to_fit(handle.get_stream()); - if (test_weighted) { - edgelist_weights.resize(number_of_local_edges, handle.get_stream()); - edgelist_weights.shrink_to_fit(handle.get_stream()); - } + auto local_partition_id_op = + [comm_size, + key_func = cugraph::experimental::detail::compute_partition_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto pair) { + return key_func(thrust::get<0>(pair), thrust::get<1>(pair)) / + comm_size; // global partition id to local partition id + }; + auto pair_first = + store_transposed + ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) + : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + auto edge_counts = test_weighted + ? cugraph::experimental::groupby_and_count(pair_first, + pair_first + edgelist_rows.size(), + edgelist_weights.begin(), + local_partition_id_op, + col_comm_size, + handle.get_stream()) + : cugraph::experimental::groupby_and_count(pair_first, + pair_first + edgelist_rows.size(), + local_partition_id_op, + col_comm_size, + handle.get_stream()); + + std::vector h_edge_counts(edge_counts.size()); + raft::update_host( + h_edge_counts.data(), edge_counts.data(), edge_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + + std::vector h_displacements(h_edge_counts.size(), size_t{0}); + std::partial_sum(h_edge_counts.begin(), h_edge_counts.end() - 1, h_displacements.begin() + 1); // 3. renumber rmm::device_uvector renumber_map_labels(0, handle.get_stream()); cugraph::experimental::partition_t partition{}; - vertex_t aggregate_number_of_vertices{}; + vertex_t number_of_vertices{}; edge_t number_of_edges{}; - // FIXME: set do_expensive_check to false once validated - std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) = - cugraph::experimental::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - store_transposed ? edgelist_cols.data() : edgelist_rows.data(), - store_transposed ? edgelist_rows.data() : edgelist_cols.data(), - edgelist_rows.size(), - false, - true); - assert(aggregate_number_of_vertices == number_of_vertices); + { + std::vector major_ptrs(h_edge_counts.size()); + std::vector minor_ptrs(major_ptrs.size()); + std::vector counts(major_ptrs.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + major_ptrs[i] = + (store_transposed ? edgelist_cols.begin() : edgelist_rows.begin()) + h_displacements[i]; + minor_ptrs[i] = + (store_transposed ? edgelist_rows.begin() : edgelist_cols.begin()) + h_displacements[i]; + counts[i] = static_cast(h_edge_counts[i]); + } + // FIXME: set do_expensive_check to false once validated + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + cugraph::experimental::renumber_edgelist( + handle, + vertices.data(), + static_cast(vertices.size()), + major_ptrs, + minor_ptrs, + counts, + true); + } // 4. create a graph + std::vector> edgelists( + h_edge_counts.size()); + for (size_t i = 0; i < h_edge_counts.size(); ++i) { + edgelists[i] = cugraph::experimental::edgelist_t{ + edgelist_rows.data() + h_displacements[i], + edgelist_cols.data() + h_displacements[i], + test_weighted ? edgelist_weights.data() + h_displacements[i] + : static_cast(nullptr), + static_cast(h_edge_counts[i])}; + } + return std::make_tuple( cugraph::experimental::graph_t( handle, - std::vector>{ - cugraph::experimental::edgelist_t{ - edgelist_rows.data(), - edgelist_cols.data(), - test_weighted ? edgelist_weights.data() : nullptr, - static_cast(edgelist_rows.size())}}, + edgelists, partition, number_of_vertices, number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, true, true), std::move(renumber_map_labels)); @@ -163,14 +157,14 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +generate_graph_from_edgelist_impl(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber) { vertex_t number_of_vertices = static_cast(vertices.size()); @@ -196,13 +190,13 @@ generate_graph_from_edgelist(raft::handle_t const& handle, test_weighted ? edgelist_weights.data() : nullptr, static_cast(edgelist_rows.size())}, number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false}, + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, renumber ? true : false, true), std::move(renumber_map_labels)); } -} // namespace detail +} // namespace template ( - handle, - std::move(vertices), - std::move(edgelist_rows), - std::move(edgelist_cols), - std::move(edgelist_weights), - is_symmetric, - test_weighted, - renumber); + return generate_graph_from_edgelist_impl( + handle, + std::move(vertices), + std::move(edgelist_rows), + std::move(edgelist_cols), + std::move(edgelist_weights), + is_symmetric, + test_weighted, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index ddbbac603ee..bf7539864be 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -13,9 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include +#include #include +#include #include #include @@ -339,7 +342,73 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, d_vertices.begin(), d_vertices.end(), vertex_t{0}); + handle.get_stream_view().synchronize(); + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + auto vertex_key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{comm_size}; + d_vertices.resize( + thrust::distance( + d_vertices.begin(), + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin(), + d_vertices.end(), + [comm_rank, key_func = vertex_key_func] __device__(auto val) { + return key_func(val) != comm_rank; + })), + handle.get_stream()); + d_vertices.shrink_to_fit(handle.get_stream()); + + auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}; + size_t number_of_local_edges{}; + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(d_edgelist_rows.begin(), d_edgelist_cols.begin())); + number_of_local_edges = thrust::distance( + edge_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + d_edgelist_rows.size(), + [comm_rank, key_func = edge_key_func] __device__(auto e) { + auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e); + auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e); + return key_func(major, minor) != comm_rank; + })); + } + + d_edgelist_rows.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + if (test_weighted) { + d_edgelist_weights.resize(number_of_local_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + } + } + handle.get_stream_view().synchronize(); return generate_graph_from_edgelist( handle, std::move(d_vertices), diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 16ea7a486fc..3f0bb0b4a1f 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -13,10 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include +#include #include +#include #include +#include #include #include @@ -41,39 +45,191 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, double a, double b, double c, - uint64_t seed, + uint64_t base_seed, bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber) + bool renumber, + std::vector const& partition_ids, + size_t num_partitions) { + CUGRAPH_EXPECTS(!multi_gpu || renumber, "renumber should be true if multi_gpu is true."); + CUGRAPH_EXPECTS(size_t{1} << scale <= static_cast(std::numeric_limits::max()), + "vertex_t overflow."); + CUGRAPH_EXPECTS( + (size_t{1} << scale) * edge_factor <= static_cast(std::numeric_limits::max()), + " edge_t overflow."); + + vertex_t number_of_vertices = static_cast(size_t{1} << scale); + edge_t number_of_edges = + static_cast(static_cast(number_of_vertices) * edge_factor); + + std::vector partition_edge_counts(partition_ids.size()); + std::vector partition_vertex_firsts(partition_ids.size()); + std::vector partition_vertex_lasts(partition_ids.size()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + partition_edge_counts[i] = number_of_edges / num_partitions + + (id < number_of_edges % num_partitions ? edge_t{1} : edge_t{0}); + + partition_vertex_firsts[i] = (number_of_vertices / num_partitions) * id; + partition_vertex_lasts[i] = (number_of_vertices / num_partitions) * (id + 1); + if (id < number_of_vertices % num_partitions) { + partition_vertex_firsts[i] += id; + partition_vertex_lasts[i] += id + 1; + } else { + partition_vertex_firsts[i] += number_of_vertices % num_partitions; + partition_vertex_lasts[i] += number_of_vertices % num_partitions; + } + } + rmm::device_uvector d_edgelist_rows(0, handle.get_stream()); rmm::device_uvector d_edgelist_cols(0, handle.get_stream()); - std::tie(d_edgelist_rows, d_edgelist_cols) = - cugraph::experimental::generate_rmat_edgelist( - handle, scale, edge_factor, a, b, c, seed, undirected ? true : false, scramble_vertex_ids); + rmm::device_uvector d_edgelist_weights(0, handle.get_stream()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + rmm::device_uvector d_tmp_rows(0, handle.get_stream()); + rmm::device_uvector d_tmp_cols(0, handle.get_stream()); + std::tie(i == 0 ? d_edgelist_rows : d_tmp_rows, i == 0 ? d_edgelist_cols : d_tmp_cols) = + cugraph::experimental::generate_rmat_edgelist(handle, + scale, + partition_edge_counts[i], + a, + b, + c, + base_seed + id, + undirected ? true : false, + scramble_vertex_ids); + + rmm::device_uvector d_tmp_weights(0, handle.get_stream()); + if (test_weighted) { + if (i == 0) { + d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + } else { + d_tmp_weights.resize(d_tmp_rows.size(), handle.get_stream()); + } + + raft::random::Rng rng(base_seed + num_partitions + id); + rng.uniform(i == 0 ? d_edgelist_weights.data() : d_tmp_weights.data(), + i == 0 ? d_edgelist_weights.size() : d_tmp_weights.size(), + weight_t{0.0}, + weight_t{1.0}, + handle.get_stream()); + } + + if (i > 0) { + auto start_offset = d_edgelist_rows.size(); + d_edgelist_rows.resize(start_offset + d_tmp_rows.size(), handle.get_stream()); + d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_rows.begin(), + d_tmp_rows.end(), + d_edgelist_rows.begin() + start_offset); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_cols.begin(), + d_tmp_cols.end(), + d_edgelist_cols.begin() + start_offset); + if (test_weighted) { + d_edgelist_weights.resize(d_edgelist_rows.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_tmp_weights.begin(), + d_tmp_weights.end(), + d_edgelist_weights.begin() + start_offset); + } + } + } + if (undirected) { // FIXME: need to symmetrize CUGRAPH_FAIL("unimplemented."); } - rmm::device_uvector d_edgelist_weights(test_weighted ? d_edgelist_rows.size() : 0, - handle.get_stream()); - if (test_weighted) { - raft::random::Rng rng(seed + 1); - rng.uniform(d_edgelist_weights.data(), - d_edgelist_weights.size(), - weight_t{0.0}, - weight_t{1.0}, - handle.get_stream()); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + rmm::device_uvector d_rx_edgelist_rows(0, handle.get_stream()); + rmm::device_uvector d_rx_edgelist_cols(0, handle.get_stream()); + rmm::device_uvector d_rx_edgelist_weights(0, handle.get_stream()); + if (test_weighted) { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin(), + d_edgelist_weights.begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols, + d_rx_edgelist_weights), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } else { + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(store_transposed ? d_edgelist_cols.begin() : d_edgelist_rows.begin(), + store_transposed ? d_edgelist_rows.begin() : d_edgelist_cols.begin())); + + std::forward_as_tuple(std::tie(store_transposed ? d_rx_edgelist_cols : d_rx_edgelist_rows, + store_transposed ? d_rx_edgelist_rows : d_rx_edgelist_cols), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + edge_first, + edge_first + d_edgelist_rows.size(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + d_edgelist_rows = std::move(d_rx_edgelist_rows); + d_edgelist_cols = std::move(d_rx_edgelist_cols); + d_edgelist_weights = std::move(d_rx_edgelist_weights); + } + + rmm::device_uvector d_vertices(0, handle.get_stream()); + for (size_t i = 0; i < partition_ids.size(); ++i) { + auto id = partition_ids[i]; + + auto start_offset = d_vertices.size(); + d_vertices.resize(start_offset + (partition_vertex_lasts[i] - partition_vertex_firsts[i]), + handle.get_stream()); + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertices.begin() + start_offset, + d_vertices.end(), + partition_vertex_firsts[i]); } - rmm::device_uvector d_vertices(static_cast(size_t{1} << scale), - handle.get_stream()); - thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - d_vertices.begin(), - d_vertices.end(), - vertex_t{0}); + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_rx_vertices(0, handle.get_stream()); + std::tie(d_rx_vertices, std::ignore) = cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_vertex_t{ + comm_size}] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + d_vertices = std::move(d_rx_vertices); + } return generate_graph_from_edgelist( handle, @@ -90,59 +246,71 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -157,105 +325,128 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -270,105 +461,128 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> @@ -383,49 +597,60 @@ generate_graph_from_rmat_params( bool undirected, bool scramble_vertex_ids, bool test_weighted, - bool renumber); + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); template std::tuple, rmm::device_uvector> -generate_graph_from_rmat_params(raft::handle_t const& handle, - size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool test_weighted, - bool renumber); +generate_graph_from_rmat_params( + raft::handle_t const& handle, + size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + bool test_weighted, + bool renumber, + std::vector const& partition_ids, + size_t num_partitions); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 37e87c62247..e81a76b4163 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -106,6 +106,22 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } +template +std::tuple, + rmm::device_uvector> +generate_graph_from_edgelist(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + bool is_symmetric, + bool test_weighted, + bool renumber); + // returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template std::tuple, @@ -130,22 +146,6 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, bool test_weighted, bool renumber); -template -std::tuple, - rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber); - template const& partition_ids, + size_t num_partitions); struct rmat_params_t { size_t scale{}; @@ -182,19 +184,5 @@ struct input_graph_specifier_t { rmat_params_t rmat_params{}; }; -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return (v >= 0) && (v < num_vertices); -} - -template -std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, - vertex_t v) -{ - return v < num_vertices; -} - } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu new file mode 100644 index 00000000000..5d32fb8a5d1 --- /dev/null +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector sort_by_key(raft::handle_t const& handle, + vertex_t const* keys, + value_t const* values, + size_t num_pairs) +{ + rmm::device_uvector sorted_keys(num_pairs, handle.get_stream_view()); + rmm::device_uvector sorted_values(num_pairs, handle.get_stream_view()); + + thrust::copy( + rmm::exec_policy(handle.get_stream_view()), keys, keys + num_pairs, sorted_keys.begin()); + thrust::copy( + rmm::exec_policy(handle.get_stream_view()), values, values + num_pairs, sorted_values.begin()); + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream_view()), + sorted_keys.begin(), + sorted_keys.end(), + sorted_values.begin()); + + return sorted_values; +} + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + float const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + double const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + int32_t const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + float const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + double const* values, + size_t num_pairs); + +template rmm::device_uvector sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + int64_t const* values, + size_t num_pairs); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp new file mode 100644 index 00000000000..579dc3c550f --- /dev/null +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector sort_by_key(raft::handle_t const& handle, + vertex_t const* keys, + value_t const* values, + size_t num_pairs); + +} // namespace test +} // namespace cugraph diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index ead41705628..23aa159314f 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -42,7 +42,7 @@ def egonet(input_graph, vertices, radius=1): num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) - num_partition_edges = num_edges + num_local_edges = num_edges cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -50,8 +50,10 @@ def egonet(input_graph, vertices, radius=1): if weights is not None: c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype + is_weighted = True else: weight_t = np.dtype("float32") + is_weighted = False # Pointers for egonet vertices = vertices.astype('int32') @@ -72,10 +74,11 @@ def egonet(input_graph, vertices, radius=1): ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_verts, num_edges, False, + is_weighted, False, False) if(weight_t==np.dtype("float32")): diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index ccae26fe7e6..5fb9de788cf 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -52,8 +52,12 @@ def mg_katz_centrality(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True + raise NotImplementedError # FIXME: c_edge_weights is always set to NULL else: + weights = None weight_t = np.dtype("float32") + is_weighted = False if alpha is None: alpha = 0.1 @@ -67,11 +71,13 @@ def mg_katz_centrality(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C vertex_partition_offsets_host = vertex_partition_offsets.values_host @@ -85,9 +91,10 @@ def mg_katz_centrality(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index f58630d07aa..a3cebeac272 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -56,12 +56,12 @@ def louvain(input_df, src = input_df['src'] dst = input_df['dst'] - num_partition_edges = len(src) + num_local_edges = len(src) if "value" in input_df.columns: weights = input_df['value'] else: - weights = cudf.Series(np.full(num_partition_edges, 1.0, dtype=np.float32)) + weights = cudf.Series(np.full(num_local_edges, 1.0, dtype=np.float32)) vertex_t = src.dtype if num_global_edges > (2**31 - 1): @@ -94,9 +94,10 @@ def louvain(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, sorted_by_degree, + True, False, True) # store_transposed, multi_gpu # Create the output dataframe, column lengths must be equal to the number of diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 12f2342559b..c2f92f0f33b 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -51,8 +51,12 @@ def mg_pagerank(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True + raise NotImplementedError # FIXME: c_edge_weights is always set to NULL else: + weights = None weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -62,11 +66,13 @@ def mg_pagerank(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C vertex_partition_offsets_host = vertex_partition_offsets.values_host @@ -81,9 +87,10 @@ def mg_pagerank(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index 527cb2bcf0a..44630ba5fb3 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -58,7 +58,7 @@ def mg_bfs(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -77,9 +77,10 @@ def mg_bfs(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + False, # BFS runs on unweighted graphs False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index 15d956836b4..82a4ebe04d6 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -46,9 +46,11 @@ def mg_sssp(input_df, if "value" in input_df.columns: weights = input_df['value'] weight_t = weights.dtype + is_weighted = True else: weights = None weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -58,7 +60,7 @@ def mg_sssp(input_df, np.dtype("double") : numberTypeEnum.doubleType} # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] @@ -79,9 +81,10 @@ def mg_sssp(input_df, ((numberTypeMap[vertex_t])), ((numberTypeMap[edge_t])), ((numberTypeMap[weight_t])), - num_partition_edges, + num_local_edges, num_global_verts, num_global_edges, True, + is_weighted, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 81a68d42360..2c619a052ec 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -42,7 +42,7 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + cdef int num_local_edges = len(src) df = cudf.DataFrame() df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) @@ -71,8 +71,10 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. if weights is not None: c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype + is_weighted = True else: weight_t = np.dtype("float32") + is_weighted = False # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. @@ -96,10 +98,10 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. ((numberTypeEnum.int32Type)), ((numberTypeEnum.int32Type)), ((numberTypeMap[weight_t])), - #num_verts, num_edges, - num_partition_edges, + num_local_edges, num_verts, num_edges, False, + is_weighted, True, False) diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index 10c90f44cb8..b169e42ccf8 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -46,10 +46,11 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - size_t num_partition_edges, + size_t num_local_edges, size_t num_global_vertices, size_t num_global_edges, bool sorted_by_degree, + bool is_weighted, bool transposed, bool multi_gpu) except + @@ -106,18 +107,21 @@ cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental" # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + cdef cppclass major_minor_weights_t[vertex_t, edge_t, weight_t]: major_minor_weights_t(const handle_t &handle) pair[unique_ptr[device_buffer], size_t] get_major_wrap() pair[unique_ptr[device_buffer], size_t] get_minor_wrap() pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + unique_ptr[vector[edge_t]] get_edge_counts_wrap() ctypedef fused shuffled_vertices_t: - major_minor_weights_t[int, float] - major_minor_weights_t[int, double] - major_minor_weights_t[long, float] - major_minor_weights_t[long, double] + major_minor_weights_t[int, int, float] + major_minor_weights_t[int, int, double] + major_minor_weights_t[int, long, float] + major_minor_weights_t[int, long, double] + major_minor_weights_t[long, long, float] + major_minor_weights_t[long, long, double] # 3. return type for renumber: # @@ -151,13 +155,12 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": # cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + cdef unique_ptr[major_minor_weights_t[vertex_t, edge_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( const handle_t &handle, vertex_t *edgelist_major_vertices, vertex_t *edgelist_minor_vertices, weight_t* edgelist_weights, - edge_t num_edges, - bool is_hyper_partitioned) except + + edge_t num_edges) except + # 5. `renumber_edgelist()` wrapper # @@ -167,7 +170,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": const handle_t &handle, vertex_t *edgelist_major_vertices, vertex_t *edgelist_minor_vertices, - edge_t num_edges, - bool is_hyper_partitioned, + const vector[edge_t]& edge_counts, bool do_check, bool multi_gpu) except + diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 682c6b32a0f..99626cdee08 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -22,6 +22,7 @@ from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref import numpy as np +from libcpp.memory cimport make_unique from libcpp.utility cimport move from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer @@ -103,13 +104,11 @@ def renumber(input_df, # maybe use cpdef ? raise Exception("Incompatible vertex_t and edge_t types.") # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(major_vertices) + cdef int num_local_edges = len(major_vertices) cdef uintptr_t c_major_vertices = major_vertices.__cuda_array_interface__['data'][0] cdef uintptr_t c_minor_vertices = minor_vertices.__cuda_array_interface__['data'][0] - cdef bool is_hyper_partitioned = False # for now - cdef uintptr_t shuffled_major = NULL cdef uintptr_t shuffled_minor = NULL @@ -119,12 +118,14 @@ def renumber(input_df, # maybe use cpdef ? cdef pair[unique_ptr[device_buffer], size_t] pair_original cdef pair[unique_ptr[device_buffer], size_t] pair_partition - # tparams: vertex_t, weight_t: + # tparams: vertex_t, edge_t, weight_t: # - cdef unique_ptr[major_minor_weights_t[int, float]] ptr_shuffled_32_32 - cdef unique_ptr[major_minor_weights_t[int, double]] ptr_shuffled_32_64 - cdef unique_ptr[major_minor_weights_t[long, float]] ptr_shuffled_64_32 - cdef unique_ptr[major_minor_weights_t[long, double]] ptr_shuffled_64_64 + cdef unique_ptr[major_minor_weights_t[int, int, float]] ptr_shuffled_32_32_32 + cdef unique_ptr[major_minor_weights_t[int, int, double]] ptr_shuffled_32_32_64 + cdef unique_ptr[major_minor_weights_t[int, long, float]] ptr_shuffled_32_64_32 + cdef unique_ptr[major_minor_weights_t[int, long, double]] ptr_shuffled_32_64_64 + cdef unique_ptr[major_minor_weights_t[long, long, float]] ptr_shuffled_64_64_32 + cdef unique_ptr[major_minor_weights_t[long, long, double]] ptr_shuffled_64_64_64 # tparams: vertex_t, edge_t: # @@ -132,6 +133,11 @@ def renumber(input_df, # maybe use cpdef ? cdef unique_ptr[renum_quad_t[int, long]] ptr_renum_quad_32_64 cdef unique_ptr[renum_quad_t[long, long]] ptr_renum_quad_64_64 + # tparam: vertex_t: + # + cdef unique_ptr[vector[int]] edge_counts_32 + cdef unique_ptr[vector[long]] edge_counts_64 + # tparam: vertex_t: # cdef unique_ptr[vector[int]] uniq_partition_vector_32 @@ -143,31 +149,32 @@ def renumber(input_df, # maybe use cpdef ? if ( edge_t == np.dtype("int32")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + ptr_shuffled_32_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + num_local_edges).release()) + shuffled_df = renumber_helper(ptr_shuffled_32_32_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_32 = move(ptr_shuffled_32_32_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df - + edge_counts_32 = make_unique[vector[int]](1, num_local_edges) + shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_32.get()), 1, mg_flag).release()) @@ -190,8 +197,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -205,24 +211,25 @@ def renumber(input_df, # maybe use cpdef ? elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + ptr_shuffled_32_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_32_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_32 = move(ptr_shuffled_32_32_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_32 = make_unique[vector[int]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -230,8 +237,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_32.get()), do_check, mg_flag).release()) @@ -254,8 +260,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_32.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -271,24 +276,25 @@ def renumber(input_df, # maybe use cpdef ? elif ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + ptr_shuffled_32_64_32.reset(call_shuffle[int, long, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_64_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_32_64_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -296,8 +302,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -320,8 +325,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -335,24 +339,25 @@ def renumber(input_df, # maybe use cpdef ? return renumbered_map, shuffled_df elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + ptr_shuffled_32_64_64.reset(call_shuffle[int, long, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_32_64_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_32_64_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -360,8 +365,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -384,8 +388,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_32.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0), - uniq_partition_vector_32.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_32_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df # @@ -401,24 +404,25 @@ def renumber(input_df, # maybe use cpdef ? if ( edge_t == np.dtype("int64")): if( weight_t == np.dtype("float32")): if(is_multi_gpu): - ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + ptr_shuffled_64_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_64_64_32.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_64_64_32.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -426,8 +430,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -450,8 +453,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), - uniq_partition_vector_64.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df @@ -466,24 +468,25 @@ def renumber(input_df, # maybe use cpdef ? elif( weight_t == np.dtype("float64")): if(is_multi_gpu): - ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + ptr_shuffled_64_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), c_major_vertices, c_minor_vertices, c_edge_weights, - num_partition_edges, - is_hyper_partitioned).release()) + num_local_edges).release()) - shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights) + shuffled_df = renumber_helper(ptr_shuffled_64_64_64.get(), vertex_t, weights) major_vertices = shuffled_df['major_vertices'] minor_vertices = shuffled_df['minor_vertices'] - num_partition_edges = len(shuffled_df) + num_local_edges = len(shuffled_df) if not transposed: major = 'src'; minor = 'dst' else: major = 'dst'; minor = 'src' shuffled_df = shuffled_df.rename(columns={'major_vertices':major, 'minor_vertices':minor}, copy=False) + edge_counts_64 = move(ptr_shuffled_64_64_64.get().get_edge_counts_wrap()) else: shuffled_df = input_df + edge_counts_64 = make_unique[vector[long]](1, num_local_edges) shuffled_major = major_vertices.__cuda_array_interface__['data'][0] shuffled_minor = minor_vertices.__cuda_array_interface__['data'][0] @@ -491,8 +494,7 @@ def renumber(input_df, # maybe use cpdef ? ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), shuffled_major, shuffled_minor, - num_partition_edges, - is_hyper_partitioned, + deref(edge_counts_64.get()), do_check, mg_flag).release()) @@ -515,8 +517,7 @@ def renumber(input_df, # maybe use cpdef ? uniq_partition_vector_64.get()[0].at(rank_indx+1)), dtype=vertex_t) else: - new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(0), - uniq_partition_vector_64.get()[0].at(1)), + new_series = cudf.Series(np.arange(0, ptr_renum_quad_64_64.get().get_num_vertices()), dtype=vertex_t) # create new cudf df From b442f3be635b11781ebfad0cc44684554dd0c315 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Tue, 6 Apr 2021 10:46:26 -0500 Subject: [PATCH 45/51] Updated NetworkX version to 2.5.1 (#1510) Updated NetworkX version to latest version, which addresses an incompatibility with the latest `decorator` dependency. Tested by running the BC tests which were previously failing with Nx 2.5 Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) - Jordan Jacobelli (https://github.com/Ethyling) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/1510 --- conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index f26c3dd45d9..a138f5e80df 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=10.1 - clang=8.0.1 diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 2848cc49dc7..d53fefc086a 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=10.2 - clang=8.0.1 diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 82e8b409d13..771b175aa92 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -18,7 +18,7 @@ dependencies: - ucx-py=0.19* - ucx-proc=*=gpu - scipy -- networkx +- networkx>=2.5.1 - python-louvain - cudatoolkit=11.0 - clang=8.0.1 From 1b34e264cab785db88dab2ea0dea7349ea326674 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 7 Apr 2021 09:04:48 -0400 Subject: [PATCH 46/51] Improve graph primitives performance on graphs with widely varying vertex degrees (#1447) Partially addresses Issue #1442 Update graph primitives used by PageRank, Katz Centrality, BFS, and SSSP to launch 3 different kernels based on vertex degrees to address thread divergence issue. In addition, cut memory footprint of the VertexFrontier class used by BFS & SSSP. The following highlights performance improvement with this optimization. R-mat 2^25 vertices 2^25 * 32 edges PageRank: 7.66, 7.42, 8.83, 8.83 seconds (the first two unweighted, the last two weighted, first & third without personalization)=> 1.07, 1.08, 1.36, 1.39 seconds Katz: 1.08, 1.94 seconds (unweighted, weighted)=> 0.243, 0.275 BFS: 1.32 seconds=> 0.251 R-mat 2^25 vertices 2^25 * 16 edges SSSP: 1.89 seconds (memory allocation fails with the edge factor of 32)=> 0.317 And now SSSP also works with 2^25 vertices 2^25 * 32 edges with the memory footprint improvement and it took 0.514 sec. Still needs additional optimizations to reach the target performance 1. add BFS & SSSP specific optimizations (the current implementation assumes general reduction operations while BFS can pick any source vertex if a vertex is discovered by multiple source vertices and SSSP picks the one with the minimum edge weight, these pure function reduction operations allow additional optimizations). 2. Launch 3 different kernels in multiple streams to recover parallelism when the frontier size is relatively small (currently three kernels are queued in a single stream, and this leads to up to 3x decrease in parallelism) Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Alex Fender (https://github.com/afender) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1447 --- cpp/include/experimental/graph.hpp | 11 +- cpp/include/experimental/graph_functions.hpp | 2 + cpp/include/experimental/graph_view.hpp | 27 +- .../copy_v_transform_reduce_in_out_nbr.cuh | 256 ++++---- ...ransform_reduce_key_aggregated_out_nbr.cuh | 2 +- cpp/include/patterns/count_if_e.cuh | 179 +----- cpp/include/patterns/edge_op_utils.cuh | 38 +- ...orm_reduce_by_adj_matrix_row_col_key_e.cuh | 2 +- cpp/include/patterns/transform_reduce_e.cuh | 260 ++++++-- .../update_frontier_v_push_if_out_nbr.cuh | 603 +++++++++++------- cpp/include/patterns/vertex_frontier.cuh | 344 +++++----- cpp/include/utilities/dataframe_buffer.cuh | 36 ++ cpp/src/experimental/bfs.cu | 34 +- cpp/src/experimental/graph.cu | 20 +- cpp/src/experimental/graph_view.cu | 28 +- cpp/src/experimental/sssp.cu | 34 +- cpp/tests/experimental/bfs_test.cpp | 24 +- .../experimental/katz_centrality_test.cpp | 24 +- cpp/tests/experimental/mg_bfs_test.cpp | 35 +- .../experimental/mg_katz_centrality_test.cpp | 35 +- cpp/tests/experimental/mg_sssp_test.cpp | 35 +- cpp/tests/experimental/pagerank_test.cpp | 24 +- cpp/tests/experimental/sssp_test.cpp | 26 +- cpp/tests/pagerank/mg_pagerank_test.cpp | 29 +- .../utilities/generate_graph_from_edgelist.cu | 13 +- 25 files changed, 1276 insertions(+), 845 deletions(-) diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index a380200ea1f..27f766b8593 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -88,12 +88,12 @@ class graph_tget_number_of_vertices(), this->get_number_of_edges(), this->get_graph_properties(), - vertex_partition_segment_offsets_.size() > 0, + adj_matrix_partition_segment_offsets_.size() > 0, false); } @@ -105,9 +105,10 @@ class graph_t partition_{}; std::vector - vertex_partition_segment_offsets_{}; // segment offsets within the vertex partition based on - // vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is true + adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based + // on vertex degree, relevant only if + // sorted_by_global_degree_within_vertex_partition is + // true }; // single-GPU version diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp index 100742adccd..b48dc6da136 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/experimental/graph_functions.hpp @@ -251,6 +251,8 @@ void unrenumber_local_int_vertices( vertex_t local_int_vertex_last, bool do_expensive_check = false); +// FIXME: We may add unrenumber_int_rows(or cols) as this will require communication only within a +// sub-communicator and potentially be more efficient. /** * @brief Unrenumber (possibly non-local) internal vertices to external vertices based on the * providied @p renumber_map_labels. diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 47c93b42ca9..e9593b70ddb 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -301,7 +301,7 @@ class graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, std::vector const& adj_matrix_partition_weights, - std::vector const& vertex_partition_segment_offsets, + std::vector const& adj_matrix_partition_segment_offsets, partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -431,6 +431,17 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets(size_t partition_idx) const + { + return adj_matrix_partition_segment_offsets_.size() > 0 + ? std::vector( + adj_matrix_partition_segment_offsets_.begin() + + partition_idx * (detail::num_segments_per_vertex_partition + 1), + adj_matrix_partition_segment_offsets_.begin() + + (partition_idx + 1) * (detail::num_segments_per_vertex_partition + 1)) + : std::vector{}; + } + // FIXME: this function is not part of the public stable API. This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy // implementations directly accessing CSR/CSC data, but this function will eventually become @@ -499,9 +510,10 @@ class graph_view_t partition_{}; std::vector - vertex_partition_segment_offsets_{}; // segment offsets within the vertex partition based on - // vertex degree, relevant only if - // sorted_by_global_degree_within_vertex_partition is true + adj_matrix_partition_segment_offsets_{}; // segment offsets within the vertex partition based + // on vertex degree, relevant only if + // sorted_by_global_degree_within_vertex_partition is + // true }; // single-GPU version @@ -612,6 +624,13 @@ class graph_view_t get_local_adj_matrix_partition_segment_offsets( + size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return segment_offsets_.size() > 0 ? segment_offsets_ : std::vector{}; + } + // FIXME: this function is not part of the public stable API.This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy // implementations directly accessing CSR/CSC data, but this function will eventually become diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index e6a73a874ae..6d828dab513 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -42,23 +42,7 @@ namespace experimental { namespace detail { -// FIXME: block size requires tuning -int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 128; - -#if 0 -// FIXME: delete this once we verify that the thrust replace in for_all_major_for_all_nbr_low_degree is no slower than the original for loop based imoplementation -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - lhs = plus_edge_op_result(lhs, rhs); -} - -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - atomic_add(&lhs, rhs); -} -#endif +int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 512; template (tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); -#if 1 auto transform_op = [&matrix_partition, &adj_matrix_row_value_input_first, &adj_matrix_col_value_input_first, @@ -148,44 +131,6 @@ __global__ void for_all_major_for_all_nbr_low_degree( atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); }); } -#else - // FIXME: delete this once we verify that the code above is not slower than this. - e_op_result_t e_op_result_sum{init}; // relevent only if update_major == true - for (edge_t i = 0; i < local_degree; ++i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(major_offset); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) - : minor; - auto row_offset = GraphViewType::is_adj_matrix_transposed - ? minor_offset - : static_cast(major_offset); - auto col_offset = GraphViewType::is_adj_matrix_transposed - ? static_cast(major_offset) - : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (update_major) { - accumulate_edge_op_result(e_op_result_sum, e_op_result); - } else { - accumulate_edge_op_result(*(result_value_output_first + minor_offset), - e_op_result); - } - } - if (update_major) { *(result_value_output_first + idx) = e_op_result_sum; } -#endif idx += gridDim.x * blockDim.x; } } @@ -219,14 +164,14 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto idx = static_cast(tid / raft::warp_size()); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = lane_id == 0 ? init : e_op_result_t{}; // relevent only if update_major == true - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size) { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { auto minor = indices[i]; auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); @@ -293,10 +238,10 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto idx = static_cast(blockIdx.x); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = threadIdx.x == 0 ? init : e_op_result_t{}; // relevent only if update_major == true @@ -358,7 +303,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, T init, VertexValueOutputIterator vertex_value_output_first) { - using vertex_t = typename GraphViewType::vertex_type; + constexpr auto update_major = (in == GraphViewType::is_adj_matrix_transposed); + using vertex_t = typename GraphViewType::vertex_type; static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); @@ -398,15 +344,13 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, matrix_partition_device_t matrix_partition(graph_view, i); auto major_tmp_buffer_size = - GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed) - ? matrix_partition.get_major_size() - : vertex_t{0}; + GraphViewType::is_multi_gpu && update_major ? matrix_partition.get_major_size() : vertex_t{0}; auto major_tmp_buffer = allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); auto major_init = T{}; - if (in == GraphViewType::is_adj_matrix_transposed) { + if (update_major) { if (GraphViewType::is_multi_gpu) { auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); @@ -416,60 +360,142 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } - int comm_root_rank = 0; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - comm_root_rank = i * row_comm_size + row_comm_rank; - } - - if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { - raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets.size() > 0) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + static_assert(detail::num_segments_per_vertex_partition == 3); + if (segment_offsets[1] > 0) { + raft::grid_1d_block_t update_grid(segment_offsets[1], + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_high_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first, + e_op, + major_init); + } + } + if (segment_offsets[2] - segment_offsets[1] > 0) { + raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], detail::copy_v_transform_reduce_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; - - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - (in == GraphViewType::is_adj_matrix_transposed) ? major_buffer_first - : minor_buffer_first, - e_op, - major_init); - } else { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - vertex_value_output_first, - e_op, - major_init); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first + segment_offsets[1] : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_mid_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first + (update_major ? segment_offsets[1] : vertex_t{0}), + e_op, + major_init); + } + } + if (segment_offsets[3] - segment_offsets[2] > 0) { + raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first + segment_offsets[2] : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first + (update_major ? segment_offsets[2] : vertex_t{0}), + e_op, + major_init); + } + } + } else { + if (matrix_partition.get_major_size() > 0) { + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + // FIXME: with C++17 we can collapse the if-else statement below with a functor with "if + // constexpr" that returns either a multi-GPU output buffer or a single-GPU output buffer. + if (GraphViewType::is_multi_gpu) { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + update_major ? major_buffer_first : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + vertex_value_output_first, + e_op, + major_init); + } } } - if (GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed)) { + if (GraphViewType::is_multi_gpu && update_major) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); @@ -487,7 +513,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } - if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) { + if (GraphViewType::is_multi_gpu && !update_major) { auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 22dc2041793..f904c35ef9e 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -60,10 +60,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto idx = static_cast(tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); if (local_degree > 0) { diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 99bfc80f643..4eb3fea24c4 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -16,132 +16,16 @@ #pragma once #include -#include #include -#include -#include +#include -#include -#include #include -#include -#include - #include -#include namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr count_if_e_for_all_block_size = 128; - -// FIXME: function names conflict if included with transform_reduce_e.cuh -template -__global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - typename GraphViewType::edge_type* block_counts, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto idx = static_cast(tid); - - edge_t count{0}; - while (idx < static_cast(matrix_partition.get_major_size())) { - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); -#if 1 - count += thrust::count_if( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&matrix_partition, - &adj_matrix_row_value_input_first, - &adj_matrix_col_value_input_first, - &e_op, - idx, - indices, - weights] __device__(auto i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - - return e_op_result; - }); -#else - // FIXME: delete this once we verify that the code above is not slower than this. - for (vertex_t i = 0; i < local_degree; ++i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (e_op_result) { count++; } - } -#endif - idx += gridDim.x * blockDim.x; - } - - using BlockReduce = cub::BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage; - count = BlockReduce(temp_storage).Sum(count); - if (threadIdx.x == 0) { *(block_counts + blockIdx.x) = count; } -} - -} // namespace detail - /** * @brief Count the number of edges that satisfies the given predicate. * @@ -182,55 +66,18 @@ typename GraphViewType::edge_type count_if_e( AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op) { - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - - edge_t count{0}; - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - - if (matrix_partition.get_major_size() > 0) { - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; - - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::count_if_e_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - rmm::device_uvector block_counts(update_grid.num_blocks, handle.get_stream()); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - block_counts.data(), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within - // the previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - count += thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_counts.begin(), - block_counts.end(), - edge_t{0}, - thrust::plus()); - } - } - - if (GraphViewType::is_multi_gpu) { - count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); - } - - return count; + using edge_t = typename GraphViewType::edge_type; + + return transform_reduce_e(handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + cast_edge_op_bool_to_integer{e_op}, + edge_t{0}); } } // namespace experimental diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/patterns/edge_op_utils.cuh index 58fb31c7605..198c1880ff4 100644 --- a/cpp/include/patterns/edge_op_utils.cuh +++ b/cpp/include/patterns/edge_op_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,6 +77,42 @@ struct evaluate_edge_op { } }; +template +struct cast_edge_op_bool_to_integer { + static_assert(std::is_integral::value); + using vertex_type = typename GraphViewType::vertex_type; + using weight_type = typename GraphViewType::weight_type; + using row_value_type = typename std::iterator_traits::value_type; + using col_value_type = typename std::iterator_traits::value_type; + + EdgeOp e_op{}; + + template + __device__ std::enable_if_t>::valid, T> + operator()(V r, V c, W w, R rv, C cv) + { + return e_op(r, c, w, rv, cv) ? T{1} : T{0}; + } + + template + __device__ std::enable_if_t>::valid, T> + operator()(V r, V c, R rv, C cv) + { + return e_op(r, c, rv, cv) ? T{1} : T{0}; + } +}; + template __host__ __device__ std::enable_if_t::value, T> plus_edge_op_result( T const& lhs, T const& rhs) diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 34721c75e31..9848aa21f88 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -62,10 +62,10 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto idx = static_cast(tid); while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(static_cast(major_offset)); if (local_degree > 0) { diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 1f59777bc35..b95e036d460 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -41,31 +41,34 @@ int32_t constexpr transform_reduce_e_for_all_block_size = 128; template __global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - BlockResultIterator block_result_first, + ResultIterator result_iter /* size 1 */, EdgeOp e_op) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; using weight_t = typename GraphViewType::weight_type; - using e_op_result_t = typename std::iterator_traits::value_type; + using e_op_result_t = typename std::iterator_traits::value_type; - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = static_cast(tid); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid); e_op_result_t e_op_result_sum{}; - while (idx < static_cast(matrix_partition.get_major_size())) { + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); -#if 1 - auto sum = thrust::transform_reduce( + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + auto sum = thrust::transform_reduce( thrust::seq, thrust::make_counting_iterator(edge_t{0}), thrust::make_counting_iterator(local_degree), @@ -104,9 +107,112 @@ __global__ void for_all_major_for_all_nbr_low_degree( [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); -#else - // FIXME: delete this once we verify that the code above is not slower than this. - for (vertex_t i = 0; i < local_degree; ++i) { + idx += gridDim.x * blockDim.x; + } + + e_op_result_sum = + block_reduce_edge_op_result().compute( + e_op_result_sum); + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } +} + +template +__global__ void for_all_major_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultIterator result_iter /* size 1 */, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(transform_reduce_e_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(tid / raft::warp_size()); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + } + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } + + e_op_result_sum = + block_reduce_edge_op_result().compute( + e_op_result_sum); + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } +} + +template +__global__ void for_all_major_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultIterator result_iter /* size 1 */, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + size_t idx = static_cast(blockIdx.x); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(major_last - major_first)) { + auto major_offset = major_start_offset + idx; + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { auto minor = indices[i]; auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); @@ -132,14 +238,13 @@ __global__ void for_all_major_for_all_nbr_low_degree( e_op); e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); } -#endif - idx += gridDim.x * blockDim.x; + idx += gridDim.x; } e_op_result_sum = block_reduce_edge_op_result().compute( e_op_result_sum); - if (threadIdx.x == 0) { *(block_result_first + blockIdx.x) = e_op_result_sum; } + if (threadIdx.x == 0) { atomic_accumulate_edge_op_result(result_iter, e_op_result_sum); } } } // namespace detail @@ -190,51 +295,106 @@ T transform_reduce_e(raft::handle_t const& handle, using vertex_t = typename GraphViewType::vertex_type; - T result{}; + auto result_buffer = allocate_dataframe_buffer(1, handle.get_stream()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(result_buffer), + get_dataframe_buffer_begin(result_buffer) + 1, + T{}); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - if (matrix_partition.get_major_size() > 0) { - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_value_start_offset() - : vertex_t{0}; + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets.size() > 0) { + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + static_assert(detail::num_segments_per_vertex_partition == 3); + if (segment_offsets[1] > 0) { + raft::grid_1d_block_t update_grid(segment_offsets[1], + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::for_all_major_for_all_nbr_high_degree<<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_first() + segment_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + if (segment_offsets[2] - segment_offsets[1] > 0) { + raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1], detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); - auto block_result_buffer = - allocate_dataframe_buffer(update_grid.num_blocks, handle.get_stream()); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - get_dataframe_buffer_begin(block_result_buffer), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within - // the previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - auto partial_result = - thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(block_result_buffer), - get_dataframe_buffer_begin(block_result_buffer) + update_grid.num_blocks, - T(), - [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); - - result = plus_edge_op_result(result, partial_result); + detail::for_all_major_for_all_nbr_mid_degree<<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[1], + matrix_partition.get_major_first() + segment_offsets[2], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + if (segment_offsets[3] - segment_offsets[2] > 0) { + raft::grid_1d_thread_t update_grid(segment_offsets[3] - segment_offsets[2], + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + matrix_partition.get_major_first() + segment_offsets[2], + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } + } else { + if (matrix_partition.get_major_size() > 0) { + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + matrix_partition.get_major_first(), + matrix_partition.get_major_last(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + get_dataframe_buffer_begin(result_buffer), + e_op); + } } } + auto result = + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(result_buffer), + get_dataframe_buffer_begin(result_buffer) + 1, + T{}, + [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); }); + if (GraphViewType::is_multi_gpu) { result = host_scalar_allreduce(handle.get_comms(), result, handle.get_stream()); } diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 4d557b97a30..3d87f19969e 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -15,7 +15,6 @@ */ #pragma once -#include #include #include #include @@ -37,13 +36,15 @@ #include #include #include -#include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -55,9 +56,7 @@ namespace experimental { namespace detail { -// FIXME: block size requires tuning -int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 128; -int32_t constexpr update_frontier_v_push_if_out_nbr_update_block_size = 128; +int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 512; template (thrust::distance(row_first, row_last)); auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; + auto idx = static_cast(tid); - while (idx < num_rows) { + while (idx < static_cast(thrust::distance(row_first, row_last))) { vertex_t row = *(row_first + idx); auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_out_degree{}; thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); - for (vertex_t i = 0; i < local_out_degree; ++i) { + for (edge_t i = 0; i < local_out_degree; ++i) { auto col = indices[i]; auto weight = weights != nullptr ? weights[i] : 1.0; auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); @@ -125,12 +123,145 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( } } +template +__global__ void for_all_frontier_row_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + RowIterator row_first, + RowIterator row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(update_frontier_v_push_if_out_nbr_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto idx = static_cast(tid / raft::warp_size()); + + while (idx < static_cast(thrust::distance(row_first, row_last))) { + vertex_t row = *(row_first + idx); + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { + auto col = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (thrust::get<0>(e_op_result) == true) { + // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows + // within a partition are sorted by their out-degree in decreasing order, we can compute + // a tight uppper bound for the maximum number of pushes per warp/block and use shared + // memory buffer to reduce the number of atomicAdd operations. + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); + } + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_frontier_row_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + RowIterator row_first, + RowIterator row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto idx = static_cast(blockIdx.x); + + while (idx < static_cast(thrust::distance(row_first, row_last))) { + vertex_t row = *(row_first + idx); + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { + auto col = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (thrust::get<0>(e_op_result) == true) { + // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows + // within a partition are sorted by their out-degree in decreasing order, we can compute + // a tight uppper bound for the maximum number of pushes per warp/block and use shared + // memory buffer to reduce the number of atomicAdd operations. + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); + } + } + + idx += gridDim.x; + } +} + template -size_t reduce_buffer_elements(raft::handle_t const& handle, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t num_buffer_elements, - ReduceOp reduce_op) +size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t num_buffer_elements, + ReduceOp reduce_op) { thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, @@ -182,92 +313,6 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, } } -template -__global__ void update_frontier_and_vertex_output_values( - vertex_partition_device_t vertex_partition, - BufferKeyInputIterator buffer_key_input_first, - BufferPayloadInputIterator buffer_payload_input_first, - size_t num_buffer_elements, - VertexValueInputIterator vertex_value_input_first, - VertexValueOutputIterator vertex_value_output_first, - vertex_t** bucket_ptrs, - size_t* bucket_sizes_ptr, - size_t invalid_bucket_idx, - vertex_t invalid_vertex, - VertexOp v_op) -{ - static_assert(std::is_same::value_type, - vertex_t>::value); - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; - size_t block_idx = blockIdx.x; - // FIXME: it might be more performant to process more than one element per thread - auto num_blocks = (num_buffer_elements + blockDim.x - 1) / blockDim.x; - - using BlockScan = - cub::BlockScan; - __shared__ typename BlockScan::TempStorage temp_storage; - - __shared__ size_t bucket_block_start_offsets[num_buckets]; - - size_t bucket_block_local_offsets[num_buckets]; - size_t bucket_block_aggregate_sizes[num_buckets]; - - while (block_idx < num_blocks) { - for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } - - size_t selected_bucket_idx{invalid_bucket_idx}; - vertex_t key{invalid_vertex}; - - if (idx < num_buffer_elements) { - key = *(buffer_key_input_first + idx); - auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); - auto v_val = *(vertex_value_input_first + key_offset); - auto payload = *(buffer_payload_input_first + idx); - auto v_op_result = v_op(v_val, payload); - selected_bucket_idx = thrust::get<0>(v_op_result); - if (selected_bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); - bucket_block_local_offsets[selected_bucket_idx] = 1; - } - } - - for (size_t i = 0; i < num_buckets; ++i) { - BlockScan(temp_storage) - .ExclusiveSum(bucket_block_local_offsets[i], - bucket_block_local_offsets[i], - bucket_block_aggregate_sizes[i]); - } - - if (threadIdx.x == 0) { - for (size_t i = 0; i < num_buckets; ++i) { - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - bucket_block_start_offsets[i] = - atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), - static_cast(bucket_block_aggregate_sizes[i])); - } - } - - __syncthreads(); - - // FIXME: better use shared memory buffer to aggreaget global memory writes - if (selected_bucket_idx != invalid_bucket_idx) { - bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + - bucket_block_local_offsets[selected_bucket_idx]] = key; - } - - idx += gridDim.x * blockDim.x; - block_idx += gridDim.x; - } -} - } // namespace detail /** @@ -289,10 +334,12 @@ __global__ void update_frontier_and_vertex_output_values( * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex in the current frontier. v - * in [vertex_first, vertex_last) should be distinct (and should belong to this process in - * multi-GPU), otherwise undefined behavior - * @param vertex_last Iterator pointing to the last (exclusive) vertex in the current frontier. + * @param vertex_frontier VertexFrontier class object for vertex frontier managements. This object + * includes multiple bucket objects. + * @param cur_fontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the current + * iteration. + * @param next_frontier_bucket_indices Indices of the VertexFrontier buckets to store new frontier + * vertices for the next iteration. * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + @@ -314,35 +361,33 @@ __global__ void update_frontier_and_vertex_output_values( * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.get_number_of_local_vertices(). - * @param vertex_frontier vertex frontier class object for vertex frontier managements. This object - * includes multiple bucket objects. * @param v_op Binary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p * graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs for * this vertex and returns the target bucket index (for frontier update) and new verrtex property - * values (to update *(@p vertex_value_output_first + i)). + * values (to update *(@p vertex_value_output_first + i)). The target bucket index should either be + * VertexFrontier::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. */ template void update_frontier_v_push_if_out_nbr( raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexFrontierType& vertex_frontier, + size_t cur_frontier_bucket_idx, + std::vector const& next_frontier_bucket_indices, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op, ReduceOp reduce_op, VertexValueInputIterator vertex_value_input_first, VertexValueOutputIterator vertex_value_output_first, - VertexFrontierType& vertex_frontier, VertexOp v_op) { static_assert(!GraphViewType::is_adj_matrix_transposed, @@ -353,6 +398,9 @@ void update_frontier_v_push_if_out_nbr( using weight_t = typename GraphViewType::weight_type; using payload_t = typename ReduceOp::type; + auto cur_frontier_vertex_first = vertex_frontier.get_bucket(cur_frontier_bucket_idx).begin(); + auto cur_frontier_vertex_last = vertex_frontier.get_bucket(cur_frontier_bucket_idx).end(); + // 1. fill the buffer rmm::device_uvector keys(size_t{0}, handle.get_stream()); @@ -361,57 +409,55 @@ void update_frontier_v_push_if_out_nbr( for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - rmm::device_uvector frontier_rows( - 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true - - size_t frontier_size{}; + rmm::device_uvector frontier_rows(0, handle.get_stream()); if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - auto sub_comm_rank = col_comm_rank; - frontier_size = host_scalar_bcast(col_comm, - (static_cast(sub_comm_rank) == i) - ? thrust::distance(vertex_first, vertex_last) - : size_t{0}, - i, - handle.get_stream()); - if (static_cast(sub_comm_rank) != i) { - frontier_rows.resize(frontier_size, handle.get_stream()); + auto frontier_size = + host_scalar_bcast(col_comm, + (static_cast(col_comm_rank) == i) + ? thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last) + : size_t{0} /* dummy */, + i, + handle.get_stream()); + frontier_rows.resize(frontier_size, handle.get_stream()); + + if (static_cast(col_comm_rank) == i) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + cur_frontier_vertex_first, + cur_frontier_vertex_last, + frontier_rows.begin()); } - device_bcast( - col_comm, vertex_first, frontier_rows.begin(), frontier_size, i, handle.get_stream()); + + device_bcast(col_comm, + cur_frontier_vertex_first, + frontier_rows.begin(), + frontier_size, + i, + handle.get_stream()); } else { - frontier_size = thrust::distance(vertex_first, vertex_last); + frontier_rows.resize(thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + cur_frontier_vertex_first, + cur_frontier_vertex_last, + frontier_rows.begin()); } - auto max_pushes = - frontier_size > 0 - ? frontier_rows.size() > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : edge_t{0}; + auto max_pushes = frontier_rows.size() > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + [matrix_partition] __device__(auto row) { + auto row_offset = + matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : edge_t{0}; // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain // additional per column data for filtering in e_op). If we can pause & resume execution if @@ -433,23 +479,80 @@ void update_frontier_v_push_if_out_nbr( auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed ? vertex_t{0} : matrix_partition.get_major_value_start_offset(); - - // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in decreasing - // order, we will apply this kernel only to low out-degree vertices. - if (frontier_size > 0) { - raft::grid_1d_thread_t for_all_low_degree_grid( - frontier_size, - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - if (frontier_rows.size() > 0) { - detail::for_all_frontier_row_for_all_nbr_low_degree<< 0) { + static_assert(detail::num_segments_per_vertex_partition == 3); + std::vector h_thresholds(detail::num_segments_per_vertex_partition - 1); + h_thresholds[0] = matrix_partition.get_major_first() + segment_offsets[1]; + h_thresholds[1] = matrix_partition.get_major_first() + segment_offsets[2]; + rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); + raft::update_device( + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); + rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin()); + std::vector h_offsets(d_offsets.size()); + raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + if (h_offsets[0] > 0) { + raft::grid_1d_block_t update_grid( + h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_high_degree<<>>( + matrix_partition, + frontier_rows.begin(), + frontier_rows.begin() + h_offsets[0], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (h_offsets[1] - h_offsets[0] > 0) { + raft::grid_1d_warp_t update_grid( + h_offsets[1] - h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_mid_degree<<>>( matrix_partition, - frontier_rows.begin(), + frontier_rows.begin() + h_offsets[0], + frontier_rows.begin() + h_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (frontier_rows.size() - h_offsets[1] > 0) { + raft::grid_1d_thread_t update_grid( + frontier_rows.size() - h_offsets[1], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + frontier_rows.begin() + h_offsets[1], frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, @@ -457,14 +560,21 @@ void update_frontier_v_push_if_out_nbr( get_dataframe_buffer_begin(payload_buffer), buffer_idx.data(), e_op); - } else { - detail::for_all_frontier_row_for_all_nbr_low_degree<< 0) { + raft::grid_1d_thread_t update_grid( + frontier_rows.size(), + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( matrix_partition, - vertex_first, - vertex_last, + frontier_rows.begin(), + frontier_rows.end(), adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, keys.begin(), @@ -478,12 +588,13 @@ void update_frontier_v_push_if_out_nbr( // 2. reduce the buffer auto num_buffer_elements = - detail::reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.value(handle.get_stream()), - reduce_op); + detail::sort_and_reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); if (GraphViewType::is_multi_gpu) { + // FIXME: this step is unnecessary if row_comm_size== 1 auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -533,49 +644,113 @@ void update_frontier_v_push_if_out_nbr( payload_buffer = std::move(rx_payload_buffer); num_buffer_elements = - detail::reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - keys.size(), - reduce_op); + detail::sort_and_reduce_buffer_elements(handle, + keys.begin(), + get_dataframe_buffer_begin(payload_buffer), + keys.size(), + reduce_op); } // 3. update vertex properties if (num_buffer_elements > 0) { - raft::grid_1d_thread_t update_grid(num_buffer_elements, - detail::update_frontier_v_push_if_out_nbr_update_block_size, - handle.get_device_properties().maxGridSize[0]); - - auto constexpr invalid_vertex = invalid_vertex_id::value; + static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(num_buffer_elements, handle.get_stream()); vertex_partition_device_t vertex_partition(graph_view); - auto bucket_and_bucket_size_device_ptrs = - vertex_frontier.get_bucket_and_bucket_size_device_pointers(); - detail::update_frontier_and_vertex_output_values - <<>>( - vertex_partition, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - num_buffer_elements, - vertex_value_input_first, - vertex_value_output_first, - std::get<0>(bucket_and_bucket_size_device_ptrs), - std::get<1>(bucket_and_bucket_size_device_ptrs), - VertexFrontierType::kInvalidBucketIdx, - invalid_vertex, - v_op); - - auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - std::vector bucket_sizes(VertexFrontierType::kNumBuckets); - raft::update_host(bucket_sizes.data(), - bucket_sizes_device_ptr, - VertexFrontierType::kNumBuckets, - handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) { - vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]); + auto key_payload_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(keys.begin(), get_dataframe_buffer_begin(payload_buffer))); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_payload_pair_first, + key_payload_pair_first + num_buffer_elements, + bucket_indices.begin(), + [vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { + auto key = thrust::get<0>(pair); + auto payload = thrust::get<1>(pair); + auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + auto v_val = *(vertex_value_input_first + key_offset); + auto v_op_result = v_op(v_val, payload); + auto bucket_idx = thrust::get<0>(v_op_result); + if (bucket_idx != invalid_bucket_idx) { + *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); + return static_cast(bucket_idx); + } else { + return std::numeric_limits::max(); + } + }); + + resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); + + auto bucket_key_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); + keys.resize(thrust::distance( + bucket_key_pair_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + num_buffer_elements, + [] __device__(auto pair) { + return thrust::get<0>(pair) == + std::numeric_limits::max(); + })), + handle.get_stream()); + bucket_indices.resize(keys.size(), handle.get_stream()); + keys.shrink_to_fit(handle.get_stream()); + bucket_indices.shrink_to_fit(handle.get_stream()); + + bucket_key_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); + if (next_frontier_bucket_indices.size() == 1) { + vertex_frontier.get_bucket(next_frontier_bucket_indices[0]).insert(keys.begin(), keys.size()); + } else if (next_frontier_bucket_indices.size() == 2) { + auto first_bucket_size = thrust::distance( + bucket_key_pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + bucket_indices.size(), + [first_bucket_idx = static_cast(next_frontier_bucket_indices[0])] __device__( + auto pair) { return thrust::get<0>(pair) == first_bucket_idx; })); + vertex_frontier.get_bucket(next_frontier_bucket_indices[0]) + .insert(keys.begin(), first_bucket_size); + vertex_frontier.get_bucket(next_frontier_bucket_indices[1]) + .insert(keys.begin() + first_bucket_size, + thrust::distance(keys.begin() + first_bucket_size, keys.end())); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + bucket_indices.size()); + rmm::device_uvector d_indices(next_frontier_bucket_indices.size(), + handle.get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle.get_stream()); + auto it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_indices.begin(), + bucket_indices.end(), + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle.get_stream()); + d_counts.resize(d_indices.size(), handle.get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host(h_indices.data(), d_indices.data(), d_indices.size(), handle.get_stream()); + raft::update_host(h_counts.data(), d_counts.data(), d_counts.size(), handle.get_stream()); + handle.get_stream_view().synchronize(); + std::vector h_offsets(h_indices.size(), 0); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); + for (size_t i = 0; i < h_indices.size(); ++i) { + if (h_counts[i] > 0) { + vertex_frontier.get_bucket(h_indices[i]).insert(keys.begin() + h_offsets[i], h_counts[i]); + } + } } } } diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index 375ec097850..4758334e9fc 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -24,8 +24,7 @@ #include #include -#include -#include +#include #include #include @@ -37,129 +36,80 @@ namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr move_and_invalidate_if_block_size = 128; - -// FIXME: better move to another file for reusability -inline size_t round_up(size_t number_to_round, size_t modulus) -{ - return ((number_to_round + (modulus - 1)) / modulus) * modulus; -} - -template -__global__ void move_and_invalidate_if(RowIterator row_first, - RowIterator row_last, - vertex_t** bucket_ptrs, - size_t* bucket_sizes_ptr, - size_t this_bucket_idx, - size_t invalid_bucket_idx, - vertex_t invalid_vertex, - SplitOp split_op) -{ - static_assert( - std::is_same::value_type, vertex_t>::value); - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_t idx = tid; - size_t block_idx = blockIdx.x; - auto num_elements = thrust::distance(row_first, row_last); - // FIXME: it might be more performant to process more than one element per thread - auto num_blocks = (num_elements + blockDim.x - 1) / blockDim.x; - - using BlockScan = cub::BlockScan; - __shared__ typename BlockScan::TempStorage temp_storage; - - __shared__ size_t bucket_block_start_offsets[num_buckets]; - - size_t bucket_block_local_offsets[num_buckets]; - size_t bucket_block_aggregate_sizes[num_buckets]; - - while (block_idx < num_blocks) { - for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } - - size_t selected_bucket_idx{invalid_bucket_idx}; - vertex_t key{invalid_vertex}; - - if (idx < num_elements) { - key = *(row_first + idx); - selected_bucket_idx = split_op(key); - if (selected_bucket_idx != this_bucket_idx) { - *(row_first + idx) = invalid_vertex; - if (selected_bucket_idx != invalid_bucket_idx) { - bucket_block_local_offsets[selected_bucket_idx] = 1; - } - } - } - - for (size_t i = 0; i < num_buckets; ++i) { - BlockScan(temp_storage) - .ExclusiveSum(bucket_block_local_offsets[i], - bucket_block_local_offsets[i], - bucket_block_aggregate_sizes[i]); - } - - if (threadIdx.x == 0) { - for (size_t i = 0; i < num_buckets; ++i) { - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - bucket_block_start_offsets[i] = - atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), - static_cast(bucket_block_aggregate_sizes[i])); - } - } - - __syncthreads(); - - // FIXME: better use shared memory buffer to aggreaget global memory writes - if ((selected_bucket_idx != this_bucket_idx) && (selected_bucket_idx != invalid_bucket_idx)) { - bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + - bucket_block_local_offsets[selected_bucket_idx]] = key; - } - - idx += gridDim.x * blockDim.x; - block_idx += gridDim.x; - } -} - -} // namespace detail - template -class Bucket { +class SortedUniqueElementBucket { public: - Bucket(raft::handle_t const& handle, size_t capacity) - : handle_ptr_(&handle), elements_(capacity, handle.get_stream()) + SortedUniqueElementBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), elements_(0, handle.get_stream()) { - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - elements_.begin(), - elements_.end(), - invalid_vertex_id::value); } void insert(vertex_t v) { - raft::update_device(elements_.data() + size_, &v, 1, handle_ptr_->get_stream()); - ++size_; + if (elements_.size() > 0) { + rmm::device_scalar vertex(v, handle_ptr_->get_stream()); + insert(vertex.data(), vertex_t{1}); + } else { + elements_.resize(1, handle_ptr_->get_stream()); + raft::update_device(elements_.data(), &v, size_t{1}, handle_ptr_->get_stream()); + } } - size_t size() const { return size_; } + /** + * @ brief insert a list of vertices to the bucket + * + * @param sorted_unique_vertices Device pointer to the array storing the vertex list. + * @param num_sorted_unique_vertices Size of the vertex list to insert. + */ + void insert(vertex_t const* sorted_unique_vertices, vertex_t num_sorted_unique_vertices) + { + if (elements_.size() > 0) { + rmm::device_uvector merged_vertices(elements_.size() + num_sorted_unique_vertices, + handle_ptr_->get_stream()); + thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + elements_.begin(), + elements_.end(), + sorted_unique_vertices, + sorted_unique_vertices + num_sorted_unique_vertices, + merged_vertices.begin()); + merged_vertices.resize( + thrust::distance( + merged_vertices.begin(), + thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + merged_vertices.begin(), + merged_vertices.end())), + handle_ptr_->get_stream()); + merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); + elements_ = std::move(merged_vertices); + } else { + elements_.resize(num_sorted_unique_vertices, handle_ptr_->get_stream()); + thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + sorted_unique_vertices, + sorted_unique_vertices + num_sorted_unique_vertices, + elements_.begin()); + } + } - void set_size(size_t size) { size_ = size; } + size_t size() const { return elements_.size(); } template std::enable_if_t aggregate_size() const { - return host_scalar_allreduce(handle_ptr_->get_comms(), size_, handle_ptr_->get_stream()); + return host_scalar_allreduce( + handle_ptr_->get_comms(), elements_.size(), handle_ptr_->get_stream()); } template std::enable_if_t aggregate_size() const { - return size_; + return elements_.size(); } - void clear() { size_ = 0; } + void resize(size_t size) { elements_.resize(size, handle_ptr_->get_stream()); } + + void clear() { elements_.resize(0, handle_ptr_->get_stream()); } - size_t capacity() const { return elements_.size(); } + void shrink_to_fit() { elements_.shrink_to_fit(handle_ptr_->get_stream()); } auto const data() const { return elements_.data(); } @@ -169,14 +119,13 @@ class Bucket { auto begin() { return elements_.begin(); } - auto const end() const { return elements_.begin() + size_; } + auto const end() const { return elements_.end(); } - auto end() { return elements_.begin() + size_; } + auto end() { return elements_.end(); } private: raft::handle_t const* handle_ptr_{nullptr}; rmm::device_uvector elements_; - size_t size_{0}; }; template @@ -185,29 +134,17 @@ class VertexFrontier { static size_t constexpr kNumBuckets = num_buckets; static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; - VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) - : handle_ptr_(&handle), - tmp_bucket_ptrs_(num_buckets, handle.get_stream()), - tmp_bucket_sizes_(num_buckets, handle.get_stream()) + VertexFrontier(raft::handle_t const& handle) : handle_ptr_(&handle) { - CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, - "invalid input argument bucket_capacities (size mismatch)"); - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - tmp_bucket_ptrs_.begin(), - tmp_bucket_ptrs_.end(), - static_cast(nullptr)); - thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - tmp_bucket_sizes_.begin(), - tmp_bucket_sizes_.end(), - size_t{0}); - for (size_t i = 0; i < num_buckets; ++i) { - buckets_.emplace_back(handle, bucket_capacities[i]); - } + for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } } - Bucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } + SortedUniqueElementBucket& get_bucket(size_t bucket_idx) + { + return buckets_[bucket_idx]; + } - Bucket const& get_bucket(size_t bucket_idx) const + SortedUniqueElementBucket const& get_bucket(size_t bucket_idx) const { return buckets_[bucket_idx]; } @@ -218,78 +155,111 @@ class VertexFrontier { } template - void split_bucket(size_t bucket_idx, SplitOp split_op) + void split_bucket(size_t this_bucket_idx, + std::vector const& move_to_bucket_indices, + SplitOp split_op) { - auto constexpr invalid_vertex = invalid_vertex_id::value; - - auto bucket_and_bucket_size_device_ptrs = get_bucket_and_bucket_size_device_pointers(); - - auto& this_bucket = get_bucket(bucket_idx); + auto& this_bucket = get_bucket(this_bucket_idx); if (this_bucket.size() > 0) { - raft::grid_1d_thread_t move_and_invalidate_if_grid( - this_bucket.size(), - detail::move_and_invalidate_if_block_size, - handle_ptr_->get_device_properties().maxGridSize[0]); - - detail::move_and_invalidate_if - <<get_stream()>>>(this_bucket.begin(), - this_bucket.end(), - std::get<0>(bucket_and_bucket_size_device_ptrs), - std::get<1>(bucket_and_bucket_size_device_ptrs), - bucket_idx, - kInvalidBucketIdx, - invalid_vertex, - split_op); - } + static_assert(kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); + thrust::transform( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + this_bucket.begin(), + this_bucket.end(), + bucket_indices.begin(), + [split_op] __device__(auto v) { return static_cast(split_op(v)); }); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + this_bucket.resize(thrust::distance( + pair_first, + thrust::remove_if( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [invalid_bucket_idx = static_cast(kInvalidBucketIdx)] __device__(auto pair) { + return thrust::get<0>(pair) == invalid_bucket_idx; + }))); + bucket_indices.resize(this_bucket.size(), handle_ptr_->get_stream()); + this_bucket.shrink_to_fit(); + bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); + + pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + auto new_this_bucket_size = thrust::distance( + pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { + return thrust::get<0>(pair) == this_bucket_idx; + })); + + if (move_to_bucket_indices.size() == 1) { + get_bucket(move_to_bucket_indices[0]) + .insert(this_bucket.begin() + new_this_bucket_size, + thrust::distance(this_bucket.begin() + new_this_bucket_size, this_bucket.end())); + } else if (move_to_bucket_indices.size() == 2) { + auto next_bucket_size = thrust::distance( + pair_first + new_this_bucket_size, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first + new_this_bucket_size, + pair_first + bucket_indices.size(), + [next_bucket_idx = static_cast(move_to_bucket_indices[0])] __device__( + auto pair) { return thrust::get<0>(pair) == next_bucket_idx; })); + get_bucket(move_to_bucket_indices[0]) + .insert(this_bucket.begin() + new_this_bucket_size, next_bucket_size); + get_bucket(move_to_bucket_indices[1]) + .insert(this_bucket.begin() + new_this_bucket_size + next_bucket_size, + thrust::distance(this_bucket.begin() + new_this_bucket_size + next_bucket_size, + this_bucket.end())); + } else { + thrust::sort(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first + new_this_bucket_size, + pair_first + bucket_indices.size()); + rmm::device_uvector d_indices(move_to_bucket_indices.size(), + handle_ptr_->get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + bucket_indices.begin() + new_this_bucket_size, + bucket_indices.end(), + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle_ptr_->get_stream()); + d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host( + h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); + raft::update_host( + h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); + handle_ptr_->get_stream_view().synchronize(); + std::vector h_offsets(h_indices.size(), 0); + std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); + for (size_t i = 0; i < h_indices.size(); ++i) { + if (h_counts[i] > 0) { + get_bucket(h_indices[i]) + .insert(this_bucket.begin() + new_this_bucket_size + h_offsets[i], h_counts[i]); + } + } + } - // FIXME: if we adopt CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups - // and global sync(), we can merge this step with the above kernel (and rename the above kernel - // to move_if) - auto it = - thrust::remove_if(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - get_bucket(bucket_idx).begin(), - get_bucket(bucket_idx).end(), - [] __device__(auto value) { return value == invalid_vertex; }); - - auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); - std::vector bucket_sizes(kNumBuckets); - raft::update_host( - bucket_sizes.data(), bucket_sizes_device_ptr, kNumBuckets, handle_ptr_->get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); - for (size_t i = 0; i < kNumBuckets; ++i) { - if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); } + this_bucket.resize(new_this_bucket_size); + this_bucket.shrink_to_fit(); } - auto size = thrust::distance(get_bucket(bucket_idx).begin(), it); - get_bucket(bucket_idx).set_size(size); - return; } - auto get_bucket_and_bucket_size_device_pointers() - { - std::vector tmp_ptrs(buckets_.size(), nullptr); - std::vector tmp_sizes(buckets_.size(), 0); - for (size_t i = 0; i < buckets_.size(); ++i) { - tmp_ptrs[i] = get_bucket(i).data(); - tmp_sizes[i] = get_bucket(i).size(); - } - raft::update_device( - tmp_bucket_ptrs_.data(), tmp_ptrs.data(), tmp_ptrs.size(), handle_ptr_->get_stream()); - raft::update_device( - tmp_bucket_sizes_.data(), tmp_sizes.data(), tmp_sizes.size(), handle_ptr_->get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream())); - return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); - } - private: raft::handle_t const* handle_ptr_{nullptr}; - std::vector> buckets_{}; - rmm::device_uvector tmp_bucket_ptrs_; - rmm::device_uvector tmp_bucket_sizes_; + std::vector> buckets_{}; }; } // namespace experimental diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh index e59b12f2a80..b0e9c1ebfec 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -61,6 +61,21 @@ struct resize_dataframe_buffer_tuple_iterator_element_impl +struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, cudaStream_t stream) + { + std::get(buffer).shrink_to_fit(stream); + shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, stream); + } +}; + +template +struct shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl { + void run(BufferType& buffer, cudaStream_t stream) {} +}; + template auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) { @@ -111,6 +126,27 @@ void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStr .run(buffer, new_buffer_size, stream); } +template ::value>* = nullptr> +void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +{ + buffer.shrink_to_fit(stream); +} + +template ::value>* = nullptr> +void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::shrink_to_fit_dataframe_buffer_tuple_iterator_element_impl() + .run(buffer, stream); +} + template ::value>* = nullptr> diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 9145e3737b6..2a703c1c85e 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -90,11 +90,9 @@ void bfs(raft::handle_t const &handle, // 3. initialize BFS frontier - enum class Bucket { cur, num_buckets }; - std::vector bucket_sizes(static_cast(Bucket::num_buckets), - push_graph_view.get_number_of_local_vertices()); + enum class Bucket { cur, next, num_buckets }; VertexFrontier(Bucket::num_buckets)> - vertex_frontier(handle, bucket_sizes); + vertex_frontier(handle); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { vertex_frontier.get_bucket(static_cast(Bucket::cur)).insert(source_vertex); @@ -103,23 +101,18 @@ void bfs(raft::handle_t const &handle, // 4. BFS iteration vertex_t depth{0}; - auto cur_local_vertex_frontier_first = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).begin(); - auto cur_vertex_frontier_aggregate_size = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size(); while (true) { if (direction_optimizing) { CUGRAPH_FAIL("unimplemented."); } else { vertex_partition_device_t vertex_partition(push_graph_view); - auto cur_local_vertex_frontier_last = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).end(); update_frontier_v_push_if_out_nbr( handle, push_graph_view, - cur_local_vertex_frontier_first, - cur_local_vertex_frontier_last, + vertex_frontier, + static_cast(Bucket::cur), + std::vector{static_cast(Bucket::next)}, thrust::make_constant_iterator(0) /* dummy */, thrust::make_constant_iterator(0) /* dummy */, [vertex_partition, distances] __device__( @@ -135,20 +128,19 @@ void bfs(raft::handle_t const &handle, reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - vertex_frontier, [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) ? static_cast(Bucket::cur) + auto idx = (v_val == invalid_distance) ? static_cast(Bucket::next) : VertexFrontier::kInvalidBucketIdx; return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); }); - auto new_vertex_frontier_aggregate_size = - vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() - - cur_vertex_frontier_aggregate_size; - if (new_vertex_frontier_aggregate_size == 0) { break; } - - cur_local_vertex_frontier_first = cur_local_vertex_frontier_last; - cur_vertex_frontier_aggregate_size += new_vertex_frontier_aggregate_size; + vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); + vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); + vertex_frontier.swap_buckets(static_cast(Bucket::cur), + static_cast(Bucket::next)); + if (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() == 0) { + break; + } } depth++; diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 47c41cb3426..18db57a737f 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -295,8 +295,8 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::low_degree_threshold), - static_cast(detail::mid_degree_threshold)}; + std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), + static_cast(detail::low_degree_threshold)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); @@ -317,7 +317,8 @@ graph_t{}); rmm::device_uvector aggregate_segment_offsets(col_comm_size * segment_offsets.size(), default_stream); @@ -326,8 +327,8 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::low_degree_threshold), - static_cast(detail::mid_degree_threshold)}; + std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), + static_cast(detail::low_degree_threshold)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); @@ -462,7 +463,8 @@ graph_tget_number_of_vertices(), d_thresholds.begin(), d_thresholds.end(), - segment_offsets.begin() + 1); + segment_offsets.begin() + 1, + thrust::greater{}); segment_offsets_.resize(segment_offsets.size()); raft::update_host( diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index c6f39a44333..67603ae260b 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -161,7 +161,7 @@ graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, std::vector const& adj_matrix_partition_weights, - std::vector const& vertex_partition_segment_offsets, + std::vector const& adj_matrix_partition_segment_offsets, partition_t const& partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -176,7 +176,7 @@ graph_view_t bucket_sizes(static_cast(Bucket::num_buckets), - push_graph_view.get_number_of_local_vertices()); + enum class Bucket { cur_near, next_near, far, num_buckets }; VertexFrontier(Bucket::num_buckets)> - vertex_frontier(handle, bucket_sizes); + vertex_frontier(handle); // 5. SSSP iteration @@ -172,8 +169,9 @@ void sssp(raft::handle_t const &handle, update_frontier_v_push_if_out_nbr( handle, push_graph_view, - vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).begin(), - vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).end(), + vertex_frontier, + static_cast(Bucket::cur_near), + std::vector{static_cast(Bucket::next_near), static_cast(Bucket::far)}, row_distances, thrust::make_constant_iterator(0) /* dummy */, [vertex_partition, distances, cutoff] __device__( @@ -193,30 +191,31 @@ void sssp(raft::handle_t const &handle, reduce_op::min>(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - vertex_frontier, [near_far_threshold] __device__(auto v_val, auto pushed_val) { auto new_dist = thrust::get<0>(pushed_val); auto idx = new_dist < v_val - ? (new_dist < near_far_threshold ? static_cast(Bucket::new_near) + ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) : static_cast(Bucket::far)) : VertexFrontier::kInvalidBucketIdx; return thrust::make_tuple(idx, pushed_val); }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); - if (vertex_frontier.get_bucket(static_cast(Bucket::new_near)).aggregate_size() > 0) { + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).shrink_to_fit(); + if (vertex_frontier.get_bucket(static_cast(Bucket::next_near)).aggregate_size() > 0) { vertex_frontier.swap_buckets(static_cast(Bucket::cur_near), - static_cast(Bucket::new_near)); + static_cast(Bucket::next_near)); } else if (vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size() > 0) { // near queue is empty, split the far queue auto old_near_far_threshold = near_far_threshold; near_far_threshold += delta; - size_t new_near_size{0}; - size_t new_far_size{0}; + size_t near_size{0}; + size_t far_size{0}; while (true) { vertex_frontier.split_bucket( static_cast(Bucket::far), + std::vector{static_cast(Bucket::cur_near)}, [vertex_partition, distances, old_near_far_threshold, near_far_threshold] __device__( auto v) { auto dist = @@ -229,17 +228,16 @@ void sssp(raft::handle_t const &handle, return static_cast(Bucket::far); } }); - new_near_size = + near_size = vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size(); - new_far_size = - vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size(); - if ((new_near_size > 0) || (new_far_size == 0)) { + far_size = vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size(); + if ((near_size > 0) || (far_size == 0)) { break; } else { near_far_threshold += delta; } } - if ((new_near_size == 0) && (new_far_size == 0)) { break; } + if ((near_size == 0) && (far_size == 0)) { break; } } else { break; } diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 8fce9488d8a..ded57dd1855 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -153,11 +154,22 @@ class Tests_BFS : public ::testing::TestWithParam { using weight_t = float; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); ASSERT_TRUE(static_cast(configuration.source) >= 0 && @@ -169,7 +181,10 @@ class Tests_BFS : public ::testing::TestWithParam { rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::bfs(handle, graph_view, @@ -179,7 +194,12 @@ class Tests_BFS : public ::testing::TestWithParam { false, std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "BFS took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 71011f3d018..c7756699acd 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -171,11 +172,22 @@ class Tests_KatzCentrality : public ::testing::TestWithParam graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); auto degrees = graph_view.compute_in_degrees(handle); @@ -191,7 +203,10 @@ class Tests_KatzCentrality : public ::testing::TestWithParam d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::katz_centrality(handle, graph_view, @@ -204,7 +219,12 @@ class Tests_KatzCentrality : public ::testing::TestWithParam unrenumbered_graph( diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index 76ccb5d9de3..64ffedd2492 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct BFS_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -117,6 +123,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -130,10 +137,20 @@ class Tests_MGBFS : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -149,7 +166,10 @@ class Tests_MGBFS : public ::testing::TestWithParam { rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::bfs(handle, mg_graph_view, @@ -157,10 +177,14 @@ class Tests_MGBFS : public ::testing::TestWithParam { d_mg_predecessors.data(), static_cast(configuration.source), false, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG BFS took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -204,8 +228,7 @@ class Tests_MGBFS : public ::testing::TestWithParam { d_sg_predecessors.data(), unrenumbered_source, false, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); // 5-3. compare diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index e3033af3771..937bd33472b 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -31,6 +32,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct KatzCentrality_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -117,6 +123,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -150,7 +167,10 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam d_mg_katz_centralities( mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::katz_centrality(handle, mg_graph_view, @@ -160,10 +180,14 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam::max(), - false, - true); + false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG Katz Centrality took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -189,8 +213,7 @@ class Tests_MGKatzCentrality : public ::testing::TestWithParam::max(), // max_iterations - false, - true); + false); // 5-4. compare diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index 48e4dc869f4..de39b8da128 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct SSSP_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -115,6 +121,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -128,10 +135,20 @@ class Tests_MGSSSP : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -147,7 +164,10 @@ class Tests_MGSSSP : public ::testing::TestWithParam { rmm::device_uvector d_mg_predecessors(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } // FIXME: disable do_expensive_check cugraph::experimental::sssp(handle, @@ -155,10 +175,14 @@ class Tests_MGSSSP : public ::testing::TestWithParam { d_mg_distances.data(), d_mg_predecessors.data(), static_cast(configuration.source), - std::numeric_limits::max(), - true); + std::numeric_limits::max()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG SSSP took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results @@ -202,8 +226,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam { d_sg_distances.data(), d_sg_predecessors.data(), unrenumbered_source, - std::numeric_limits::max(), - true); + std::numeric_limits::max()); // 5-3. compare diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 649fe11d805..0340140d14b 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -213,11 +214,22 @@ class Tests_PageRank : public ::testing::TestWithParam { constexpr bool renumber = true; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); std::vector h_personalization_vertices{}; @@ -271,7 +283,10 @@ class Tests_PageRank : public ::testing::TestWithParam { rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::pagerank(handle, graph_view, @@ -286,7 +301,12 @@ class Tests_PageRank : public ::testing::TestWithParam { false, false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "PageRank took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index 9364d261dec..e8ab3ec5426 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -157,11 +158,22 @@ class Tests_SSSP : public ::testing::TestWithParam { constexpr bool renumber = true; raft::handle_t handle{}; + HighResClock hr_clock{}; + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t graph(handle); rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); std::tie(graph, d_renumber_map_labels) = read_graph(handle, configuration, renumber); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto graph_view = graph.view(); ASSERT_TRUE(static_cast(configuration.source) >= 0 && @@ -172,7 +184,10 @@ class Tests_SSSP : public ::testing::TestWithParam { rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::sssp(handle, graph_view, @@ -182,7 +197,12 @@ class Tests_SSSP : public ::testing::TestWithParam { std::numeric_limits::max(), false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "SSSP took " << elapsed_time * 1e-6 << " s.\n"; + } if (configuration.check_correctness) { cugraph::experimental::graph_t unrenumbered_graph( @@ -323,7 +343,7 @@ INSTANTIATE_TEST_CASE_P( SSSP_Usecase("test/datasets/wiki2003.mtx", 1000), SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0), // disable correctness checks for large graphs - SSSP_Usecase(cugraph::test::rmat_params_t{20, 16, 0.57, 0.19, 0.19, 0, false, false}, + SSSP_Usecase(cugraph::test::rmat_params_t{20, 32, 0.57, 0.19, 0.19, 0, false, false}, 0, false))); diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index f7b1e8dfbb4..bbc80a60a3d 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -34,6 +35,11 @@ #include +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + typedef struct PageRank_Usecase_t { cugraph::test::input_graph_specifier_t input_graph_specifier{}; @@ -127,6 +133,7 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // 1. initialize handle raft::handle_t handle{}; + HighResClock hr_clock{}; raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); auto& comm = handle.get_comms(); @@ -140,10 +147,20 @@ class Tests_MGPageRank : public ::testing::TestWithParam { // 2. create MG graph + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::graph_t mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = read_graph(handle, configuration, true); + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG read_graph took " << elapsed_time * 1e-6 << " s.\n"; + } auto mg_graph_view = mg_graph.view(); @@ -195,7 +212,10 @@ class Tests_MGPageRank : public ::testing::TestWithParam { rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(), handle.get_stream()); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } cugraph::experimental::pagerank(handle, mg_graph_view, @@ -209,7 +229,12 @@ class Tests_MGPageRank : public ::testing::TestWithParam { std::numeric_limits::max(), false); - CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG PageRank took " << elapsed_time * 1e-6 << " s.\n"; + } // 5. copmare SG & MG results diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu index a9df392d2fb..5f41e0e5ce0 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu @@ -109,7 +109,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, (store_transposed ? edgelist_rows.begin() : edgelist_cols.begin()) + h_displacements[i]; counts[i] = static_cast(h_edge_counts[i]); } - // FIXME: set do_expensive_check to false once validated std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = cugraph::experimental::renumber_edgelist( handle, @@ -117,8 +116,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(vertices.size()), major_ptrs, minor_ptrs, - counts, - true); + counts); } // 4. create a graph @@ -142,7 +140,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, number_of_vertices, number_of_edges, cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - true, true), std::move(renumber_map_labels)); } @@ -168,7 +165,6 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, { vertex_t number_of_vertices = static_cast(vertices.size()); - // FIXME: set do_expensive_check to false once validated auto renumber_map_labels = renumber ? cugraph::experimental::renumber_edgelist( handle, @@ -176,11 +172,9 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(vertices.size()), store_transposed ? edgelist_cols.data() : edgelist_rows.data(), store_transposed ? edgelist_rows.data() : edgelist_cols.data(), - static_cast(edgelist_rows.size()), - true) + static_cast(edgelist_rows.size())) : rmm::device_uvector(0, handle.get_stream()); - // FIXME: set do_expensive_check to false once validated return std::make_tuple( cugraph::experimental::graph_t( handle, @@ -191,8 +185,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, static_cast(edgelist_rows.size())}, number_of_vertices, cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - renumber ? true : false, - true), + renumber ? true : false), std::move(renumber_map_labels)); } From 79c3ba059d2cc4f2bfafece0e92671013a584175 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Wed, 7 Apr 2021 17:05:09 +0200 Subject: [PATCH 47/51] Add utility function for computing a secondary cost for BFS and SSSP output (#1376) Solves: https://github.com/rapidsai/cugraph/issues/1373 Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Andrei Schaffer (https://github.com/aschaffer) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1376 --- cpp/CMakeLists.txt | 1 + cpp/include/utilities/path_retrieval.hpp | 45 ++++++ cpp/src/utilities/path_retrieval.cu | 133 ++++++++++++++++++ python/cugraph/tests/test_utils.py | 28 ++++ python/cugraph/utilities/__init__.py | 3 +- python/cugraph/utilities/path_retrieval.pxd | 30 ++++ python/cugraph/utilities/path_retrieval.py | 100 +++++++++++++ .../utilities/path_retrieval_wrapper.pyx | 72 ++++++++++ 8 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 cpp/include/utilities/path_retrieval.hpp create mode 100644 cpp/src/utilities/path_retrieval.cu create mode 100644 python/cugraph/utilities/path_retrieval.pxd create mode 100644 python/cugraph/utilities/path_retrieval.py create mode 100644 python/cugraph/utilities/path_retrieval_wrapper.pyx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5a3cb65caa5..0388a76d729 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -402,6 +402,7 @@ endif(BUILD_STATIC_FAISS) add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu + src/utilities/path_retrieval.cu src/structure/graph.cu src/linear_assignment/hungarian.cu src/link_analysis/gunrock_hits.cpp diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/utilities/path_retrieval.hpp new file mode 100644 index 00000000000..e626d6af1ab --- /dev/null +++ b/cpp/include/utilities/path_retrieval.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { + +/** + * @brief Takes the results of BFS or SSSP function call and sums the given + * weights along the path to the starting vertex. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream. + * @param vertices Pointer to vertex ids. + * @param preds Pointer to predecessors. + * @param info_weights Secondary weights along the edge from predecessor to vertex. + * @param out Contains for each index the sum of weights along the path unfolding. + * @param num_vertices Number of vertices. + **/ +template +void get_traversed_cost(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices); +} // namespace cugraph diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu new file mode 100644 index 00000000000..93ead5898f8 --- /dev/null +++ b/cpp/src/utilities/path_retrieval.cu @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include + +namespace cugraph { +namespace detail { + +template +__global__ void get_traversed_cost_kernel(vertex_t const *vertices, + vertex_t const *preds, + vertex_t const *vtx_map, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + for (vertex_t i = threadIdx.x + blockIdx.x * blockDim.x; i < num_vertices; + i += gridDim.x * blockDim.x) { + weight_t sum = info_weights[i]; + vertex_t pred = preds[i]; + while (pred != stop_vertex) { + vertex_t pos = vtx_map[pred]; + sum += info_weights[pos]; + pred = preds[pos]; + } + out[i] = sum; + } +} + +template +void get_traversed_cost_impl(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + auto stream = handle.get_stream(); + vertex_t max_blocks = handle.get_device_properties().maxGridSize[0]; + vertex_t max_threads = handle.get_device_properties().maxThreadsPerBlock; + + dim3 nthreads, nblocks; + nthreads.x = std::min(num_vertices, max_threads); + nthreads.y = 1; + nthreads.z = 1; + nblocks.x = std::min((num_vertices + nthreads.x - 1) / nthreads.x, max_blocks); + nblocks.y = 1; + nblocks.z = 1; + + rmm::device_uvector vtx_map_v(num_vertices, stream); + rmm::device_uvector vtx_keys_v(num_vertices, stream); + vertex_t *vtx_map = vtx_map_v.data(); + vertex_t *vtx_keys = vtx_keys_v.data(); + raft::copy(vtx_keys, vertices, num_vertices, stream); + + thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices); + + thrust::stable_sort_by_key( + rmm::exec_policy(stream)->on(stream), vtx_keys, vtx_keys + num_vertices, vtx_map); + + get_traversed_cost_kernel<<>>( + vertices, preds, vtx_map, info_weights, out, stop_vertex, num_vertices); +} +} // namespace detail + +template +void get_traversed_cost(raft::handle_t const &handle, + vertex_t const *vertices, + vertex_t const *preds, + weight_t const *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) +{ + CUGRAPH_EXPECTS(num_vertices > 0, "num_vertices should be strictly positive"); + CUGRAPH_EXPECTS(out != nullptr, "out should be of size num_vertices"); + cugraph::detail::get_traversed_cost_impl( + handle, vertices, preds, info_weights, out, stop_vertex, num_vertices); +} + +template void get_traversed_cost(raft::handle_t const &handle, + int32_t const *vertices, + int32_t const *preds, + float const *info_weights, + float *out, + int32_t stop_vertex, + int32_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int32_t const *vertices, + int32_t const *preds, + double const *info_weights, + double *out, + int32_t stop_vertex, + int32_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int64_t const *vertices, + int64_t const *preds, + float const *info_weights, + float *out, + int64_t stop_vertex, + int64_t num_vertices); + +template void get_traversed_cost(raft::handle_t const &handle, + int64_t const *vertices, + int64_t const *preds, + double const *info_weights, + double *out, + int64_t stop_vertex, + int64_t num_vertices); +} // namespace cugraph diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 2ca820271c0..55256d6b74e 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -17,7 +17,9 @@ import pytest import cugraph +import cudf from cugraph.tests import utils +import numpy as np def test_bfs_paths(): @@ -68,3 +70,29 @@ def test_bfs_paths_array(): answer = cugraph.utils.get_traversed_path_list(df, 100) assert "not in the result set" in str(ErrorMsg) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_get_traversed_cost(graph_file): + cu_M = utils.read_csv_file(graph_file) + + noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0]))) + cu_M['info'] = cu_M['2'] + noise + + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info') + + # run SSSP starting at vertex 17 + df = cugraph.sssp(G, 16) + + answer = cugraph.utilities.path_retrieval.get_traversed_cost(df, 16, + cu_M['0'], + cu_M['1'], + cu_M['info'] + ) + + df = df.sort_values(by='vertex').reset_index() + answer = answer.sort_values(by='vertex').reset_index() + + assert df.shape[0] == answer.shape[0] + assert np.allclose(df['distance'], answer['info']) diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py index 61f5596eee6..38b46b0fe87 100644 --- a/python/cugraph/utilities/__init__.py +++ b/python/cugraph/utilities/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,3 +25,4 @@ is_cp_matrix_type, is_sp_matrix_type, ) +from cugraph.utilities.path_retrieval import get_traversed_cost diff --git a/python/cugraph/utilities/path_retrieval.pxd b/python/cugraph/utilities/path_retrieval.pxd new file mode 100644 index 00000000000..88f1da8f213 --- /dev/null +++ b/python/cugraph/utilities/path_retrieval.pxd @@ -0,0 +1,30 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": + + cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle, + const vertex_t *vertices, + const vertex_t *preds, + const weight_t *info_weights, + weight_t *out, + vertex_t stop_vertex, + vertex_t num_vertices) except + + diff --git a/python/cugraph/utilities/path_retrieval.py b/python/cugraph/utilities/path_retrieval.py new file mode 100644 index 00000000000..b9baadc2f21 --- /dev/null +++ b/python/cugraph/utilities/path_retrieval.py @@ -0,0 +1,100 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import cudf + +from cugraph.structure.symmetrize import symmetrize +from cugraph.structure.number_map import NumberMap +from cugraph.utilities import path_retrieval_wrapper + + +def get_traversed_cost(df, source, source_col, dest_col, value_col): + """ + Take the DataFrame result from a BFS or SSSP function call and sums + the given weights along the path to the starting vertex. + The source_col, dest_col identifiers need to match with the vertex and + predecessor columns of df. + + Input Parameters + ---------- + df : cudf.DataFrame + The dataframe containing the results of a BFS or SSSP call + source: int + Index of the source vertex. + source_col : cudf.DataFrame + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the source index for each edge. + Source indices must be an integer type. + dest_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be an integer type. + value_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains values associated with this edge. + Weight should be a floating type. + + Returns + --------- + df : cudf.DataFrame + DataFrame containing two columns 'vertex' and 'info'. + Unreachable vertices will have value the max value of the weight type. + """ + + if 'vertex' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'vertex' column missing") + if 'distance' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'distance' column missing") + if 'predecessor' not in df.columns: + raise ValueError("DataFrame does not appear to be a BFS or " + "SSP result - 'predecessor' column missing") + + src, dst, val = symmetrize(source_col, + dest_col, + value_col) + + symmetrized_df = cudf.DataFrame() + symmetrized_df['source'] = src + symmetrized_df['destination'] = dst + symmetrized_df['weights'] = val + + input_df = df.merge(symmetrized_df, + left_on=['vertex', 'predecessor'], + right_on=['source', 'destination'], + how="left" + ) + + # Set unreachable vertex weights to max float and source vertex weight to 0 + max_val = np.finfo(val.dtype).max + input_df[['weights']] = input_df[['weights']].fillna(max_val) + input_df.loc[input_df['vertex'] == source, 'weights'] = 0 + + # Renumber + renumbered_gdf, renumber_map = NumberMap.renumber(input_df, + ["vertex"], + ["predecessor"], + preserve_order=True) + renumbered_gdf = renumbered_gdf.rename(columns={'src': 'vertex', + 'dst': 'predecessor'}) + stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0] + + out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf, + stop_vertex) + + # Unrenumber + out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf, 'vertex', + preserve_order=True)["vertex"] + return out_df diff --git a/python/cugraph/utilities/path_retrieval_wrapper.pyx b/python/cugraph/utilities/path_retrieval_wrapper.pyx new file mode 100644 index 00000000000..98d11ad07df --- /dev/null +++ b/python/cugraph/utilities/path_retrieval_wrapper.pyx @@ -0,0 +1,72 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost +from cugraph.structure.graph_primtypes cimport * +from libc.stdint cimport uintptr_t +from numba import cuda +import cudf +import numpy as np + + +def get_traversed_cost(input_df, stop_vertex): + """ + Call get_traversed_cost + """ + num_verts = input_df.shape[0] + vertex_t = input_df.vertex.dtype + weight_t = input_df.weights.dtype + + df = cudf.DataFrame() + df['vertex'] = input_df['vertex'] + df['info'] = cudf.Series(np.zeros(num_verts, dtype=weight_t)) + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef uintptr_t vertices = NULL + cdef uintptr_t preds = NULL + cdef uintptr_t out = NULL + cdef uintptr_t info_weights = NULL + + vertices = input_df['vertex'].__cuda_array_interface__['data'][0] + preds = input_df['predecessor'].__cuda_array_interface__['data'][0] + info_weights = input_df['weights'].__cuda_array_interface__['data'][0] + out = df['info'].__cuda_array_interface__['data'][0] + + if weight_t == np.float32: + c_get_traversed_cost(handle_[0], + vertices, + preds, + info_weights, + out, + stop_vertex, + num_verts) + elif weight_t == np.float64: + c_get_traversed_cost(handle_[0], + vertices, + preds, + info_weights, + out, + stop_vertex, + num_verts) + else: + raise NotImplementedError + + return df From 63e69fcf32742fdee7e14267ba6accd94fd19c4c Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 7 Apr 2021 18:51:32 -0500 Subject: [PATCH 48/51] Random Walks - Python Bindings (#1516) Python bindings for random walks closes #1488 check the rendering after the PR is merged to make sure everything render as expected Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Brad Rees (https://github.com/BradReesWork) - Andrei Schaffer (https://github.com/aschaffer) - Alex Fender (https://github.com/afender) URL: https://github.com/rapidsai/cugraph/pull/1516 --- README.md | 1 + docs/source/api.rst | 11 ++ python/cugraph/__init__.py | 2 + python/cugraph/sampling/__init__.py | 14 ++ python/cugraph/sampling/random_walks.pxd | 22 +++ python/cugraph/sampling/random_walks.py | 95 +++++++++++ .../cugraph/sampling/random_walks_wrapper.pyx | 116 +++++++++++++ python/cugraph/structure/graph_utilities.pxd | 9 + python/cugraph/tests/test_random_walks.py | 154 ++++++++++++++++++ 9 files changed, 424 insertions(+) create mode 100644 python/cugraph/sampling/__init__.py create mode 100644 python/cugraph/sampling/random_walks.pxd create mode 100644 python/cugraph/sampling/random_walks.py create mode 100644 python/cugraph/sampling/random_walks_wrapper.pyx create mode 100644 python/cugraph/tests/test_random_walks.py diff --git a/README.md b/README.md index 4bdbcd00280..ccc91bfe225 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ As of Release 0.18 - including 0.18 nightly | | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) | | | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) | | | Traveling Salesperson Problem (TSP) | Single-GPU | | +| Sampling | Random Walks (RW) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | diff --git a/docs/source/api.rst b/docs/source/api.rst index b02f8f488c5..b9b8ea4859c 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -225,6 +225,17 @@ Overlap Coefficient :undoc-members: +Sampling +======== + +Random Walks +------------ + +.. automodule:: cugraph.sampling.random_walks + :members: + :undoc-members: + + Traversal ========= diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 11ba2d6ef96..d4632708591 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -101,6 +101,8 @@ from cugraph.raft import raft_include_test from cugraph.comms import comms +from cugraph.sampling import random_walks + # Versioneer from ._version import get_versions diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py new file mode 100644 index 00000000000..fd9d072d4f8 --- /dev/null +++ b/python/cugraph/sampling/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.sampling.random_walks import random_walks diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd new file mode 100644 index 00000000000..3e0e24b4e98 --- /dev/null +++ b/python/cugraph/sampling/random_walks.pxd @@ -0,0 +1,22 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t]( + const handle_t &handle, + const graph_container_t &g, + const vertex_t *ptr_d_start, + edge_t num_paths, + edge_t max_depth) except + diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py new file mode 100644 index 00000000000..7ab3191a07c --- /dev/null +++ b/python/cugraph/sampling/random_walks.py @@ -0,0 +1,95 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cudf +from cugraph.sampling import random_walks_wrapper +import cugraph +from collections import defaultdict + +# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series + + +def random_walks( + G, + start_vertices, + max_depth=None +): + """ + compute random walks for each nodes in 'start_vertices' + + parameters + ---------- + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). + Weights in the graph are ignored. + Use weight parameter if weights need to be considered + (currently not supported) + + start_vertices : int or list or cudf.Series + A single node or a list or a cudf.Series of nodes from which to run + the random walks + + max_depth : int + The maximum depth of the random walks + + + Returns + ------- + random_walks_edge_lists : cudf.DataFrame + GPU data frame containing all random walks sources identifiers, + destination identifiers, edge weights + + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each vertex in start_vertices. + """ + if max_depth is None: + raise TypeError("must specify a 'max_depth'") + + G, _ = cugraph.utilities.check_nx_graph(G) + + if start_vertices is int: + start_vertices = [start_vertices] + + if not isinstance(start_vertices, cudf.Series): + start_vertices = cudf.Series(start_vertices) + + if G.renumbered is True: + start_vertices = G.lookup_internal_vertex_id(start_vertices) + vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( + G, start_vertices, max_depth) + + if G.renumbered: + df_ = cudf.DataFrame() + df_['vertex_set'] = vertex_set + df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True) + vertex_set = cudf.Series(df_['vertex_set']) + + edge_list = defaultdict(list) + next_path_idx = 0 + offsets = [0] + + df = cudf.DataFrame() + for s in sizes.values_host: + for i in range(next_path_idx, s+next_path_idx-1): + edge_list['src'].append(vertex_set.values_host[i]) + edge_list['dst'].append(vertex_set.values_host[i+1]) + next_path_idx += s + df = df.append(edge_list, ignore_index=True) + offsets.append(df.index[-1]+1) + edge_list['src'].clear() + edge_list['dst'].clear() + df['weight'] = edge_set + offsets = cudf.Series(offsets) + + return df, offsets diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx new file mode 100644 index 00000000000..7b16ff14018 --- /dev/null +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -0,0 +1,116 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from cugraph.sampling.random_walks cimport call_random_walks +#from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * +from libcpp cimport bool +from libcpp.utility cimport move +from libc.stdint cimport uintptr_t +from cugraph.structure import graph_primtypes_wrapper +import cudf +import rmm +import numpy as np +import numpy.ctypeslib as ctypeslib +from rmm._lib.device_buffer cimport DeviceBuffer +from cudf.core.buffer import Buffer +from cython.operator cimport dereference as deref +def random_walks(input_graph, start_vertices, max_depth): + """ + Call random_walks + """ + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + vertex_t = src.dtype + edge_t = np.dtype("int32") + weights = None + if input_graph.edgelist.weights: + weights = input_graph.edgelist.edgelist_df['weights'] + num_verts = input_graph.number_of_vertices() + num_edges = input_graph.number_of_edges(directed_edges=True) + num_partition_edges = num_edges + + if num_edges > (2**31 - 1): + edge_t = np.dtype("int64") + cdef unique_ptr[random_walk_ret_t] rw_ret_ptr + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] + weight_t = weights.dtype + is_weighted = True + else: + weight_t = np.dtype("float32") + is_weighted = False + # Pointers for random_walks + start_vertices = start_vertices.astype('int32') + cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0] + num_paths = start_vertices.size + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_verts, + num_edges, + False, + is_weighted, + False, False) + if(vertex_t == np.dtype("int32")): + if(edge_t == np.dtype("int32")): + rw_ret_ptr = move(call_random_walks[int, int]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + else: # (edge_t == np.dtype("int64")): + rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + else: # (vertex_t == edge_t == np.dtype("int64")): + rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_), + graph_container, + c_start_vertex_ptr, + num_paths, + max_depth)) + + + rw_ret= move(rw_ret_ptr.get()[0]) + vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_)) + edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_)) + sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) + vertex_set = Buffer(vertex_set) + edge_set = Buffer(edge_set) + sizes = Buffer(sizes) + + set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t) + set_edge = cudf.Series(data=edge_set, dtype=weight_t) + set_sizes = cudf.Series(data=sizes, dtype=edge_t) + + return set_vertex, set_edge, set_sizes + \ No newline at end of file diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index b169e42ccf8..c9cf1748bfe 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -83,6 +83,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] dst_indices unique_ptr[device_buffer] edge_data unique_ptr[device_buffer] subgraph_offsets + + cdef cppclass random_walk_ret_t: + size_t coalesced_sz_v_ + size_t coalesced_sz_w_ + size_t num_paths_ + size_t max_depth_ + unique_ptr[device_buffer] d_coalesced_v_ + unique_ptr[device_buffer] d_coalesced_w_ + unique_ptr[device_buffer] d_sizes_ cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py new file mode 100644 index 00000000000..9767e81ba1f --- /dev/null +++ b/python/cugraph/tests/test_random_walks.py @@ -0,0 +1,154 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION.: +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +from cugraph.tests import utils +import cugraph +import random + + +# ============================================================================= +# Parameters +# ============================================================================= +DIRECTED_GRAPH_OPTIONS = [False, True] +WEIGHTED_GRAPH_OPTIONS = [False, True] +DATASETS = [pytest.param(d) for d in utils.DATASETS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] + + +def calc_random_walks( + graph_file, + directed=False, + max_depth=None +): + """ + compute random walks for each nodes in 'start_vertices' + + parameters + ---------- + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). + Weights in the graph are ignored. + Use weight parameter if weights need to be considered + (currently not supported) + + start_vertices : int or list or cudf.Series + A single node or a list or a cudf.Series of nodes from which to run + the random walks + + max_depth : int + The maximum depth of the random walks + + + Returns + ------- + random_walks_edge_lists : cudf.DataFrame + GPU data frame containing all random walks sources identifiers, + destination identifiers, edge weights + + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each vertex in start_vertices. + """ + G = utils.generate_cugraph_graph_from_file( + graph_file, directed=directed, edgevals=True) + assert G is not None + + k = random.randint(1, 10) + start_vertices = random.sample(range(G.number_of_vertices()), k) + df, offsets = cugraph.random_walks(G, start_vertices, max_depth) + + return df, offsets, start_vertices + + +def check_random_walks(df, offsets, seeds, df_G=None): + invalid_edge = 0 + invalid_seeds = 0 + invalid_weight = 0 + offsets_idx = 0 + for i in range(len(df.index)): + src, dst, weight = df.iloc[i].to_array() + if i == offsets[offsets_idx]: + if df['src'].iloc[i] != seeds[offsets_idx]: + invalid_seeds += 1 + print( + "[ERR] Invalid seed: " + " src {} != src {}" + .format(df['src'].iloc[i], offsets[offsets_idx]) + ) + offsets_idx += 1 + + edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index( + drop=True) + exp_edge = df_G.loc[ + (df_G['src'] == (src)) & ( + df_G['dst'] == (dst))].reset_index(drop=True) + + if not exp_edge.equals(edge[:1]): + print( + "[ERR] Invalid edge: " + "There is no edge src {} dst {} weight {}" + .format(src, dst, weight) + ) + invalid_weight += 1 + + assert invalid_edge == 0 + assert invalid_seeds == 0 + assert invalid_weight == 0 + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= + + +def prepare_test(): + gc.collect() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("max_depth", [None]) +def test_random_walks_invalid_max_dept( + graph_file, + directed, + max_depth +): + """Test calls random_walks an invalid type""" + prepare_test() + with pytest.raises(TypeError): + df, offsets, seeds = calc_random_walks( + graph_file, + directed=directed, + max_depth=max_depth + ) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + df, offsets, seeds = calc_random_walks( + graph_file, + directed, + max_depth=max_depth + ) + check_random_walks(df, offsets, seeds, df_G) From e9d09eeb11414c2e12c46b4a188186e1ceee032d Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Thu, 8 Apr 2021 21:26:13 -0500 Subject: [PATCH 49/51] fix mg_renumber non-deterministic errors (#1523) * @Iroy30 added missing dask `persist()` call to ensure deterministic indirection map state prior to merging renumbering results. * @rlratzel updated MG renumbering test for latest API changes, removed redundant test, and updated test IDs to include the dataset name. Authors: - https://github.com/Iroy30 - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/1523 --- python/cugraph/structure/number_map.py | 8 +- .../test_mg_batch_betweenness_centrality.py | 3 +- ...st_mg_batch_edge_betweenness_centrality.py | 5 +- python/cugraph/tests/dask/test_mg_bfs.py | 5 +- python/cugraph/tests/dask/test_mg_comms.py | 6 +- python/cugraph/tests/dask/test_mg_degree.py | 5 +- .../tests/dask/test_mg_katz_centrality.py | 5 +- python/cugraph/tests/dask/test_mg_louvain.py | 7 +- python/cugraph/tests/dask/test_mg_pagerank.py | 5 +- python/cugraph/tests/dask/test_mg_renumber.py | 93 ++++++------------- .../cugraph/tests/dask/test_mg_replication.py | 46 ++++++--- python/cugraph/tests/dask/test_mg_sssp.py | 5 +- python/cugraph/tests/dask/test_mg_utility.py | 5 +- 13 files changed, 107 insertions(+), 91 deletions(-) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index e45a50d6dbe..cd24dfc0434 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -263,7 +263,6 @@ def indirection_map(self, ddf, src_col_names, dst_col_names): to_frame(name=newname) else: tmp_df[newname] = tmp[newname].append(tmp_dst[oldname]) - print(tmp_df.columns) else: for newname in self.col_names: tmp_df[newname] = tmp[newname] @@ -273,7 +272,7 @@ def indirection_map(self, ddf, src_col_names, dst_col_names): tmp_ddf = tmp_ddf.assign(idx=1) tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1 tmp_ddf = tmp_ddf.drop(columns='idx') - + tmp_ddf = tmp_ddf.persist() self.ddf = tmp_ddf return tmp_ddf @@ -481,8 +480,6 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, renumber_type = 'legacy' else: renumber_type = 'experimental' - df = df.rename(columns={src_col_names: "src", - dst_col_names: "dst"}) renumber_map = NumberMap() if not isinstance(src_col_names, list): @@ -514,6 +511,9 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False, df, "dst", dst_col_names, drop=True, preserve_order=preserve_order ) + else: + df = df.rename(columns={src_col_names[0]: "src", + dst_col_names[0]: "dst"}) num_edges = len(df) diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 6e1e5ea380a..02696f589e3 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -51,7 +51,8 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DATASETS, + ids=[f"dataset={d.as_posix()}" for d in DATASETS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 54b58c340aa..89844797807 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -48,7 +48,8 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("graph_file", DATASETS, + ids=[f"dataset={d}" for d in DATASETS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index 63580461b17..36d1f436b52 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,10 @@ def client_connection(): def test_dask_bfs(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index 61a4944b5f1..03a0a5d73d2 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,10 +38,14 @@ def test_dask_pagerank(client_connection): # Initialize and run pagerank on two distributed graphs # with same communicator + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path1 = r"../datasets/karate.csv" + print(f"dataset1={input_data_path1}") chunksize1 = dcg.get_chunksize(input_data_path1) input_data_path2 = r"../datasets/dolphins.csv" + print(f"dataset2={input_data_path2}") chunksize2 = dcg.get_chunksize(input_data_path2) ddf1 = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index 9f4c0d94319..93e8a365dea 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,10 @@ def client_connection(): def test_dask_mg_degree(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = cugraph.dask.get_chunksize(input_data_path) diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py index 631457f7558..eadf0f662d4 100644 --- a/python/cugraph/tests/dask/test_mg_katz_centrality.py +++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,7 +36,10 @@ def client_connection(): def test_dask_katz_centrality(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index a07eede8cb9..bd7374fb75e 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,7 +52,10 @@ def client_connection(): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.fixture(scope="module", params=utils.DATASETS_UNDIRECTED) +@pytest.fixture(scope="module", + params=utils.DATASETS_UNDIRECTED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNDIRECTED]) def daskGraphFromDataset(request, client_connection): """ Returns a new dask dataframe created from the dataset file param. diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py index 4f0b45242dd..9cb00010311 100644 --- a/python/cugraph/tests/dask/test_mg_pagerank.py +++ b/python/cugraph/tests/dask/test_mg_pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -65,7 +65,10 @@ def client_connection(): def test_dask_pagerank(client_connection, personalization_perc): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index 7f5cf6f08bc..68ec3de35f8 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,11 +38,12 @@ def client_connection(): teardown_local_dask_cluster(cluster, client) -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNRENUMBERED]) def test_mg_renumber(graph_file, client_connection): gc.collect() @@ -60,71 +61,37 @@ def test_mg_renumber(graph_file, client_connection): ddf = dask.dataframe.from_pandas(gdf, npartitions=2) - numbering = NumberMap() - numbering.from_dataframe(ddf, ["src", "src_old"], ["dst", "dst_old"]) - renumbered_df = numbering.add_internal_vertex_id( - numbering.add_internal_vertex_id(ddf, "src_id", ["src", "src_old"]), - "dst_id", - ["dst", "dst_old"], - ) - - check_src = numbering.from_internal_vertex_id( - renumbered_df, "src_id" - ).compute() - check_dst = numbering.from_internal_vertex_id( - renumbered_df, "dst_id" - ).compute() - - assert check_src["0"].to_pandas().equals(check_src["src"].to_pandas()) - assert check_src["1"].to_pandas().equals(check_src["src_old"].to_pandas()) - assert check_dst["0"].to_pandas().equals(check_dst["dst"].to_pandas()) - assert check_dst["1"].to_pandas().equals(check_dst["dst_old"].to_pandas()) - - -# Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.skipif( - is_single_gpu(), reason="skipping MG testing on Single GPU system" -) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -def test_mg_renumber2(graph_file, client_connection): - gc.collect() - - M = utils.read_csv_for_nx(graph_file) - sources = cudf.Series(M["0"]) - destinations = cudf.Series(M["1"]) - - translate = 1000 - - gdf = cudf.DataFrame() - gdf["src_old"] = sources - gdf["dst_old"] = destinations - gdf["src"] = sources + translate - gdf["dst"] = destinations + translate - gdf["weight"] = gdf.index.astype(np.float) - - ddf = dask.dataframe.from_pandas(gdf, npartitions=2) - - ren2, num2 = NumberMap.renumber( - ddf, ["src", "src_old"], ["dst", "dst_old"] - ) - - check_src = num2.from_internal_vertex_id(ren2, "src").compute() - check_src = check_src.sort_values("weight").reset_index(drop=True) - check_dst = num2.from_internal_vertex_id(ren2, "dst").compute() - check_dst = check_dst.sort_values("weight").reset_index(drop=True) - - assert check_src["0"].to_pandas().equals(gdf["src"].to_pandas()) - assert check_src["1"].to_pandas().equals(gdf["src_old"].to_pandas()) - assert check_dst["0"].to_pandas().equals(gdf["dst"].to_pandas()) - assert check_dst["1"].to_pandas().equals(gdf["dst_old"].to_pandas()) + # preserve_order is not supported for MG + renumbered_df, renumber_map = NumberMap.renumber(ddf, + ["src", "src_old"], + ["dst", "dst_old"], + preserve_order=False) + unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src", + preserve_order=False) + unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst", + preserve_order=False) + + # sort needed only for comparisons, since preserve_order is False + gdf = gdf.sort_values(by=["src", "src_old", "dst", "dst_old"]) + gdf = gdf.reset_index() + unrenumbered_df = unrenumbered_df.compute() + unrenumbered_df = unrenumbered_df.sort_values(by=["0_src", "1_src", + "0_dst", "1_dst"]) + unrenumbered_df = unrenumbered_df.reset_index() + + assert gdf["src"].equals(unrenumbered_df["0_src"]) + assert gdf["src_old"].equals(unrenumbered_df["1_src"]) + assert gdf["dst"].equals(unrenumbered_df["0_dst"]) + assert gdf["dst_old"].equals(unrenumbered_df["1_dst"]) -# Test all combinations of default/managed and pooled/non-pooled allocation @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -def test_mg_renumber3(graph_file, client_connection): +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED, + ids=[f"dataset={d.as_posix()}" + for d in utils.DATASETS_UNRENUMBERED]) +def test_mg_renumber_add_internal_vertex_id(graph_file, client_connection): gc.collect() M = utils.read_csv_for_nx(graph_file) diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index bb43d6c0f7a..3974cf9ed82 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,9 @@ @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_dataframe_with_weights( input_data_path, mg_device_count @@ -60,7 +62,9 @@ def test_replicate_cudf_dataframe_with_weights( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): gc.collect() @@ -84,7 +88,9 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) +@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_replicate_cudf_series(input_data_path, mg_device_count): gc.collect() @@ -114,7 +120,9 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_no_context(graph_file, directed, mg_device_count): @@ -129,7 +137,9 @@ def test_enable_batch_no_context(graph_file, directed, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_no_context_view_adj( @@ -145,7 +155,9 @@ def test_enable_batch_no_context_view_adj( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_context_then_views( @@ -174,7 +186,9 @@ def test_enable_batch_context_then_views( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): @@ -205,7 +219,9 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_context_no_context_views( @@ -230,7 +246,9 @@ def test_enable_batch_context_no_context_views( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_edgelist_replication( @@ -251,7 +269,9 @@ def test_enable_batch_edgelist_replication( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_adjlist_replication_weights( @@ -293,7 +313,9 @@ def test_enable_batch_adjlist_replication_weights( @pytest.mark.skipif( is_single_gpu(), reason="skipping MG testing on Single GPU system" ) -@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) +@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS, + ids=[f"dataset={d.as_posix()}" + for d in DATASETS_OPTIONS]) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) def test_enable_batch_adjlist_replication_no_weights( diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py index d75d76d7fd4..9e1fd1ec82f 100644 --- a/python/cugraph/tests/dask/test_mg_sssp.py +++ b/python/cugraph/tests/dask/test_mg_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,10 @@ def client_connection(): def test_dask_sssp(client_connection): gc.collect() + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 3217c1bef1a..150fa0137f5 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -46,7 +46,10 @@ def client_connection(): is_single_gpu(), reason="skipping MG testing on Single GPU system" ) def test_from_edgelist(client_connection): + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) input_data_path = r"../datasets/karate.csv" + print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( input_data_path, From 62c1c6824ab9f4249ed227cb4954076d282d3b57 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Mon, 12 Apr 2021 08:39:03 -0500 Subject: [PATCH 50/51] Fixed copyright date and format. (#1526) Update copyright data and format Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1526 --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index eb4745a61f0..3422428c96b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# Copyright (c) 2018-2020 NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # pygdf documentation build configuration file, created by # sphinx-quickstart on Wed May 3 10:59:22 2017. From 4a26dd772ded0fc0596d643d5cf268dfa2e0f53a Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Wed, 21 Apr 2021 12:12:18 -0400 Subject: [PATCH 51/51] update changelog --- CHANGELOG.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0011b99fbf3..5c72b095a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,67 @@ -# cuGraph 0.19.0 (Date TBD) - -Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch. +# cuGraph 0.19.0 (21 Apr 2021) + +## 🐛 Bug Fixes + +- Fixed copyright date and format ([#1526](https://github.com//rapidsai/cugraph/pull/1526)) [@rlratzel](https://github.com/rlratzel) +- fix mg_renumber non-deterministic errors ([#1523](https://github.com//rapidsai/cugraph/pull/1523)) [@Iroy30](https://github.com/Iroy30) +- Updated NetworkX version to 2.5.1 ([#1510](https://github.com//rapidsai/cugraph/pull/1510)) [@rlratzel](https://github.com/rlratzel) +- pascal renumbering fix ([#1505](https://github.com//rapidsai/cugraph/pull/1505)) [@Iroy30](https://github.com/Iroy30) +- Fix MNMG test failures and skip tests that are not supported on Pascal ([#1498](https://github.com//rapidsai/cugraph/pull/1498)) [@jnke2016](https://github.com/jnke2016) +- Revert "Update conda recipes pinning of repo dependencies" ([#1493](https://github.com//rapidsai/cugraph/pull/1493)) [@raydouglass](https://github.com/raydouglass) +- Update conda recipes pinning of repo dependencies ([#1485](https://github.com//rapidsai/cugraph/pull/1485)) [@mike-wendt](https://github.com/mike-wendt) +- Update to make notebook_list.py compatible with numba 0.53 ([#1455](https://github.com//rapidsai/cugraph/pull/1455)) [@rlratzel](https://github.com/rlratzel) +- Fix bugs in copy_v_transform_reduce_key_aggregated_out_nbr & groupby_gpuid_and_shuffle ([#1434](https://github.com//rapidsai/cugraph/pull/1434)) [@seunghwak](https://github.com/seunghwak) +- update default path of setup to use the new directory paths in build … ([#1425](https://github.com//rapidsai/cugraph/pull/1425)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 📖 Documentation + +- Create C++ documentation ([#1489](https://github.com//rapidsai/cugraph/pull/1489)) [@ChuckHastings](https://github.com/ChuckHastings) +- Create cuGraph developers guide ([#1431](https://github.com//rapidsai/cugraph/pull/1431)) [@ChuckHastings](https://github.com/ChuckHastings) +- Add boost 1.0 license file. ([#1401](https://github.com//rapidsai/cugraph/pull/1401)) [@seunghwak](https://github.com/seunghwak) + +## 🚀 New Features + +- Implement C/CUDA RandomWalks functionality ([#1439](https://github.com//rapidsai/cugraph/pull/1439)) [@aschaffer](https://github.com/aschaffer) +- Add R-mat generator ([#1411](https://github.com//rapidsai/cugraph/pull/1411)) [@seunghwak](https://github.com/seunghwak) + +## 🛠️ Improvements + +- Random Walks - Python Bindings ([#1516](https://github.com//rapidsai/cugraph/pull/1516)) [@jnke2016](https://github.com/jnke2016) +- Updating RAFT tag ([#1509](https://github.com//rapidsai/cugraph/pull/1509)) [@afender](https://github.com/afender) +- Clean up nullptr cuda_stream_view arguments ([#1504](https://github.com//rapidsai/cugraph/pull/1504)) [@hlinsen](https://github.com/hlinsen) +- Reduce the size of the cugraph libraries ([#1503](https://github.com//rapidsai/cugraph/pull/1503)) [@robertmaynard](https://github.com/robertmaynard) +- Add indirection and replace algorithms with new renumbering ([#1484](https://github.com//rapidsai/cugraph/pull/1484)) [@Iroy30](https://github.com/Iroy30) +- Multiple graph generator with power law distribution on sizes ([#1483](https://github.com//rapidsai/cugraph/pull/1483)) [@afender](https://github.com/afender) +- TSP solver bug fix ([#1480](https://github.com//rapidsai/cugraph/pull/1480)) [@hlinsen](https://github.com/hlinsen) +- Added cmake function and .hpp template for generating version_config.hpp file. ([#1476](https://github.com//rapidsai/cugraph/pull/1476)) [@rlratzel](https://github.com/rlratzel) +- Fix for bug in SCC on self-loops ([#1475](https://github.com//rapidsai/cugraph/pull/1475)) [@aschaffer](https://github.com/aschaffer) +- MS BFS python APIs + EgoNet updates ([#1469](https://github.com//rapidsai/cugraph/pull/1469)) [@afender](https://github.com/afender) +- Removed unused dependencies from libcugraph recipe, moved non-test script code from test script to gpu build script ([#1468](https://github.com//rapidsai/cugraph/pull/1468)) [@rlratzel](https://github.com/rlratzel) +- Remove literals passed to `device_uvector::set_element_async` ([#1453](https://github.com//rapidsai/cugraph/pull/1453)) [@harrism](https://github.com/harrism) +- ENH Change conda build directories to work with ccache ([#1452](https://github.com//rapidsai/cugraph/pull/1452)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Updating docs ([#1448](https://github.com//rapidsai/cugraph/pull/1448)) [@BradReesWork](https://github.com/BradReesWork) +- Improve graph primitives performance on graphs with widely varying vertex degrees ([#1447](https://github.com//rapidsai/cugraph/pull/1447)) [@seunghwak](https://github.com/seunghwak) +- Update Changelog Link ([#1446](https://github.com//rapidsai/cugraph/pull/1446)) [@ajschmidt8](https://github.com/ajschmidt8) +- Updated NCCL to version 2.8.4 ([#1445](https://github.com//rapidsai/cugraph/pull/1445)) [@BradReesWork](https://github.com/BradReesWork) +- Update FAISS to 1.7.0 ([#1444](https://github.com//rapidsai/cugraph/pull/1444)) [@BradReesWork](https://github.com/BradReesWork) +- Update graph partitioning scheme ([#1443](https://github.com//rapidsai/cugraph/pull/1443)) [@seunghwak](https://github.com/seunghwak) +- Add additional datasets to improve coverage ([#1441](https://github.com//rapidsai/cugraph/pull/1441)) [@jnke2016](https://github.com/jnke2016) +- Update C++ MG PageRank and SG PageRank, Katz Centrality, BFS, and SSSP to use the new R-mat graph generator ([#1438](https://github.com//rapidsai/cugraph/pull/1438)) [@seunghwak](https://github.com/seunghwak) +- Remove raft handle duplication ([#1436](https://github.com//rapidsai/cugraph/pull/1436)) [@Iroy30](https://github.com/Iroy30) +- Streams infra + support in egonet ([#1435](https://github.com//rapidsai/cugraph/pull/1435)) [@afender](https://github.com/afender) +- Prepare Changelog for Automation ([#1433](https://github.com//rapidsai/cugraph/pull/1433)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update 0.18 changelog entry ([#1429](https://github.com//rapidsai/cugraph/pull/1429)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update and Test Renumber bindings ([#1427](https://github.com//rapidsai/cugraph/pull/1427)) [@Iroy30](https://github.com/Iroy30) +- Update Louvain to use new graph primitives and pattern accelerators ([#1423](https://github.com//rapidsai/cugraph/pull/1423)) [@ChuckHastings](https://github.com/ChuckHastings) +- Replace rmm::device_vector & thrust::host_vector with rmm::device_uvector & std::vector, respectively. ([#1421](https://github.com//rapidsai/cugraph/pull/1421)) [@seunghwak](https://github.com/seunghwak) +- Update C++ MG PageRank test ([#1419](https://github.com//rapidsai/cugraph/pull/1419)) [@seunghwak](https://github.com/seunghwak) +- ENH Build with `cmake --build` & Pass ccache variables to conda recipe & use Ninja in CI ([#1415](https://github.com//rapidsai/cugraph/pull/1415)) [@Ethyling](https://github.com/Ethyling) +- Adding new primitives: copy_v_transform_reduce_key_aggregated_out_nbr & transform_reduce_by_adj_matrix_row|col_key_e bug fixes ([#1399](https://github.com//rapidsai/cugraph/pull/1399)) [@seunghwak](https://github.com/seunghwak) +- Add new primitives: compute_in|out_degrees, compute_in|out_weight_sums to graph_view_t ([#1394](https://github.com//rapidsai/cugraph/pull/1394)) [@seunghwak](https://github.com/seunghwak) +- Rename sort_and_shuffle to groupby_gpuid_and_shuffle ([#1392](https://github.com//rapidsai/cugraph/pull/1392)) [@seunghwak](https://github.com/seunghwak) +- Matching updates for RAFT comms updates (device_sendrecv, device_multicast_sendrecv, gather, gatherv) ([#1391](https://github.com//rapidsai/cugraph/pull/1391)) [@seunghwak](https://github.com/seunghwak) +- Fix forward-merge conflicts for #1370 ([#1377](https://github.com//rapidsai/cugraph/pull/1377)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add utility function for computing a secondary cost for BFS and SSSP output ([#1376](https://github.com//rapidsai/cugraph/pull/1376)) [@hlinsen](https://github.com/hlinsen) # cuGraph 0.18.0 (24 Feb 2021)