From d49e5e6a3575d4ccc5b629be85c0141e8a3c60d7 Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 25 Mar 2024 06:01:31 +0100 Subject: [PATCH 1/4] debug bellman_fod --- cpp/CMakeLists.txt | 2 + cpp/src/traversal/bellman_ford_impl.cuh | 408 ++++++++++++++++++++++++ cpp/src/traversal/bellman_ford_mg.cu | 68 ++++ cpp/src/traversal/bellman_ford_sg.cu | 68 ++++ 4 files changed, 546 insertions(+) create mode 100644 cpp/src/traversal/bellman_ford_impl.cuh create mode 100644 cpp/src/traversal/bellman_ford_mg.cu create mode 100644 cpp/src/traversal/bellman_ford_sg.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6070621134d..540a784b129 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -280,6 +280,8 @@ set(CUGRAPH_SOURCES src/traversal/bfs_sg.cu src/traversal/bfs_mg.cu src/traversal/sssp_sg.cu + src/traversal/bellman_ford_sg.cu + src/traversal/bellman_ford_mg.cu src/traversal/od_shortest_distances_sg.cu src/traversal/sssp_mg.cu src/link_analysis/hits_sg.cu diff --git a/cpp/src/traversal/bellman_ford_impl.cuh b/cpp/src/traversal/bellman_ford_impl.cuh new file mode 100644 index 00000000000..c7bf56b208f --- /dev/null +++ b/cpp/src/traversal/bellman_ford_impl.cuh @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "prims/fill_edge_property.cuh" +#include "prims/reduce_op.cuh" +#include "prims/transform_e.cuh" +#include "prims/transform_reduce_e_by_src_dst_key.cuh" +#include "prims/update_edge_src_dst_property.cuh" + +#include +#include +#include + +#include + +#include + +namespace cugraph { + +namespace detail { + +template +void bellman_ford(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + vertex_t source, + vertex_t* predecessors, + weight_t* distances) +{ + using graph_view_t = cugraph::graph_view_t; + graph_view_t current_graph_view(graph_view); + + // edge mask + cugraph::edge_property_t edge_masks_even(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + + cugraph::edge_property_t edge_masks_odd(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + + cugraph::transform_e( + handle, + current_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return !(src == dst); // mask out self-loop + }, + edge_masks_even.mutable_view()); + + current_graph_view.attach_edge_mask(edge_masks_even.view()); + + // 2. initialize distances and predecessors + + auto constexpr invalid_distance = std::numeric_limits::max(); + auto constexpr invalid_vertex = invalid_vertex_id::value; + + auto val_first = thrust::make_zip_iterator(thrust::make_tuple(distances, predecessors)); + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_last()), + val_first, + [source] __device__(auto v) { + auto distance = invalid_distance; + if (v == source) { distance = weight_t{0.0}; } + return thrust::make_tuple(distance, invalid_vertex); + }); + + edge_src_property_t src_predecessor_cache(handle); + edge_src_property_t src_distance_cache(handle); + + edge_dst_property_t dst_predecessor_cache(handle); + edge_dst_property_t dst_distance_cache(handle); + + edge_dst_property_t dst_key_cache(handle); + + rmm::device_uvector local_vertices( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + detail::sequence_fill(handle.get_stream(), + local_vertices.begin(), + local_vertices.size(), + current_graph_view.local_vertex_partition_range_first()); + + // auto vertex_begin = + // thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_first()); + // auto vertex_end = + // thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_last()); + + // thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, local_vertices.begin()); + + vertex_t itr_cnt = 0; + for (itr_cnt = 0; itr_cnt <= current_graph_view.number_of_vertices(); itr_cnt++) { + if constexpr (graph_view_t::is_multi_gpu) { + src_predecessor_cache = + edge_src_property_t(handle, current_graph_view); + src_distance_cache = edge_src_property_t(handle, current_graph_view); + + dst_predecessor_cache = + edge_dst_property_t(handle, current_graph_view); + dst_distance_cache = edge_dst_property_t(handle, current_graph_view); + + dst_key_cache = edge_dst_property_t(handle, current_graph_view); + + update_edge_src_property(handle, current_graph_view, predecessors, src_predecessor_cache); + update_edge_src_property(handle, current_graph_view, distances, src_distance_cache); + + update_edge_dst_property(handle, current_graph_view, predecessors, dst_predecessor_cache); + update_edge_dst_property(handle, current_graph_view, distances, dst_distance_cache); + + update_edge_dst_property(handle, current_graph_view, local_vertices.begin(), dst_key_cache); + } + + auto src_input_property_values = + graph_view_t::is_multi_gpu + ? view_concat(src_predecessor_cache.view(), src_distance_cache.view()) + : view_concat(detail::edge_major_property_view_t(predecessors), + detail::edge_major_property_view_t(distances)); + + auto dst_input_property_values = + graph_view_t::is_multi_gpu + ? view_concat(dst_predecessor_cache.view(), dst_distance_cache.view()) + : view_concat( + detail::edge_minor_property_view_t(predecessors, + vertex_t{0}), + detail::edge_minor_property_view_t(distances, weight_t{0})); + + if (graph_view_t::is_multi_gpu) { + auto const comm_rank = handle.get_comms().get_rank(); + auto const comm_size = handle.get_comms().get_size(); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto local_vertices_title = std::string("local_vertices_").append(std::to_string(comm_rank)); + raft::print_device_vector( + local_vertices_title.c_str(), local_vertices.begin(), local_vertices.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto distances_title = std::string("distances_").append(std::to_string(comm_rank)); + raft::print_device_vector(distances_title.c_str(), + distances, + current_graph_view.local_vertex_partition_range_size(), + std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto predecessors_title = std::string("predecessors_").append(std::to_string(comm_rank)); + raft::print_device_vector(predecessors_title.c_str(), + predecessors, + current_graph_view.local_vertex_partition_range_size(), + std::cout); + } + + rmm::device_uvector edge_reduced_dst_keys(0, handle.get_stream()); + rmm::device_uvector minimum_weights(0, handle.get_stream()); + rmm::device_uvector closest_preds(0, handle.get_stream()); + + std::forward_as_tuple(edge_reduced_dst_keys, std::tie(minimum_weights, closest_preds)) = + cugraph::transform_reduce_e_by_dst_key( + handle, + current_graph_view, + src_input_property_values, + dst_input_property_values, + *edge_weight_view, + graph_view_t::is_multi_gpu ? dst_key_cache.view() + : detail::edge_minor_property_view_t( + local_vertices.begin(), vertex_t{0}), + [] __device__(auto src, + auto dst, + thrust::tuple src_pred_dist, + thrust::tuple dst_pred_dist, + auto wt) { + auto src_pred = thrust::get<0>(src_pred_dist); + auto src_dist = thrust::get<1>(src_pred_dist); + + auto dst_pred = thrust::get<0>(dst_pred_dist); + auto dst_dist = thrust::get<1>(dst_pred_dist); + + printf( + "src = %d dst = %d wt = %f src_pred = %d dst_pred = %d, src_dist = %f dst_dist = %f\n", + static_cast(src), + static_cast(dst), + static_cast(wt), + static_cast(src_pred), + static_cast(dst_pred), + static_cast(src_dist), + static_cast(dst_dist)); + + auto relax = (src_dist < invalid_distance) && + ((dst_dist == invalid_distance) || (dst_dist > (src_dist + wt))); + + return relax ? thrust::make_tuple(src_dist + wt, src) + : thrust::make_tuple(invalid_distance, invalid_vertex); + }, + thrust::make_tuple(invalid_distance, invalid_vertex), + reduce_op::minimum>{}, + true); + + if constexpr (graph_view_t::is_multi_gpu) { + auto vertex_partition_range_lasts = current_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto func = cugraph::detail::compute_gpu_id_from_int_vertex_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + major_comm_size, + minor_comm_size}; + + rmm::device_uvector d_tx_value_counts(0, handle.get_stream()); + + auto triplet_first = thrust::make_zip_iterator( + edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin()); + + d_tx_value_counts = cugraph::groupby_and_count( + triplet_first, + triplet_first + edge_reduced_dst_keys.size(), + [func] __device__(auto val) { return func(thrust::get<0>(val)); }, + handle.get_comms().get_size(), + std::numeric_limits::max(), + handle.get_stream()); + + std::vector h_tx_value_counts(d_tx_value_counts.size()); + raft::update_host(h_tx_value_counts.data(), + d_tx_value_counts.data(), + d_tx_value_counts.size(), + handle.get_stream()); + handle.sync_stream(); + + std::forward_as_tuple(std::tie(edge_reduced_dst_keys, minimum_weights, closest_preds), + std::ignore) = + shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator( + edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin()), + h_tx_value_counts, + handle.get_stream()); + } + + using flag_t = uint8_t; + rmm::device_uvector updated = rmm::device_uvector( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + thrust::fill(handle.get_thrust_policy(), updated.begin(), updated.end(), flag_t{false}); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple( + edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + edge_reduced_dst_keys.end(), minimum_weights.end(), closest_preds.end())), + [distances, + predecessors, + updated = updated.begin(), + v_first = current_graph_view.local_vertex_partition_range_first(), + v_last = + current_graph_view.local_vertex_partition_range_last()] __device__(auto v_dist_pred) { + auto v = thrust::get<0>(v_dist_pred); + if ((v < v_first) || (v >= v_last)) { + printf("%d out of range [%d %d)\n", + static_cast(v), + static_cast(v_first), + static_cast(v_last)); + } + auto dist = thrust::get<1>(v_dist_pred); + auto pred = thrust::get<2>(v_dist_pred); + auto v_offset = v - v_first; + if (distances[v_offset] < dist) { + updated[v_offset] = flag_t{true}; + distances[v_offset] = dist; + predecessors[v_offset] = pred; + } + }); + + int nr_of_updated_vertices = + thrust::count(handle.get_thrust_policy(), updated.begin(), updated.end(), flag_t{true}); + + if constexpr (graph_view_t::is_multi_gpu) { + nr_of_updated_vertices = host_scalar_allreduce( + handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + + if (nr_of_updated_vertices == 0) { break; } + + if (graph_view_t::is_multi_gpu) { + auto const comm_rank = handle.get_comms().get_rank(); + auto const comm_size = handle.get_comms().get_size(); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto edge_reduced_dst_keys_title = + std::string("edge_reduced_dst_keys_").append(std::to_string(comm_rank)); + raft::print_device_vector(edge_reduced_dst_keys_title.c_str(), + edge_reduced_dst_keys.begin(), + edge_reduced_dst_keys.size(), + std::cout); + + auto minimum_weights_title = + std::string("minimum_weights_").append(std::to_string(comm_rank)); + raft::print_device_vector( + minimum_weights_title.c_str(), minimum_weights.begin(), minimum_weights.size(), std::cout); + + auto closest_preds_title = std::string("closest_preds_").append(std::to_string(comm_rank)); + raft::print_device_vector( + closest_preds_title.c_str(), closest_preds.begin(), closest_preds.size(), std::cout); + } + } + + if (itr_cnt == current_graph_view.local_vertex_partition_range_size()) { + std::cout << "Detected -ve cycle.\n"; + } + + /// + + /* + vertex_t color_id = 0; + while (true) { + using flag_t = uint8_t; + rmm::device_uvector is_vertex_in_mis = rmm::device_uvector( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), is_vertex_in_mis.begin(), is_vertex_in_mis.end(), 0); + + if (current_graph_view.compute_number_of_edges(handle) == 0) { break; } + + cugraph::edge_src_property_t src_mis_flags(handle, current_graph_view); + cugraph::edge_dst_property_t dst_mis_flags(handle, current_graph_view); + + cugraph::update_edge_src_property( + handle, current_graph_view, is_vertex_in_mis.begin(), src_mis_flags); + + cugraph::update_edge_dst_property( + handle, current_graph_view, is_vertex_in_mis.begin(), dst_mis_flags); + + if (color_id % 2 == 0) { + cugraph::transform_e( + handle, + current_graph_view, + src_mis_flags.view(), + dst_mis_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [color_id] __device__( + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); + }, + edge_masks_odd.mutable_view()); + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + current_graph_view.attach_edge_mask(edge_masks_odd.view()); + } else { + cugraph::transform_e( + handle, + current_graph_view, + src_mis_flags.view(), + dst_mis_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [color_id] __device__( + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); + }, + edge_masks_even.mutable_view()); + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + current_graph_view.attach_edge_mask(edge_masks_even.view()); + } + + color_id++; + } + */ +} +} // namespace detail + +template +void bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + vertex_t source, + vertex_t* predecessors, + weight_t* distances) +{ + detail::bellman_ford(handle, graph_view, edge_weight_view, source, predecessors, distances); +} + +} // namespace cugraph diff --git a/cpp/src/traversal/bellman_ford_mg.cu b/cpp/src/traversal/bellman_ford_mg.cu new file mode 100644 index 00000000000..328591728f4 --- /dev/null +++ b/cpp/src/traversal/bellman_ford_mg.cu @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "bellman_ford_impl.cuh" + +namespace cugraph { + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t source, + int64_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t source, + int64_t* predecessors, + double* distances); + +} // namespace cugraph diff --git a/cpp/src/traversal/bellman_ford_sg.cu b/cpp/src/traversal/bellman_ford_sg.cu new file mode 100644 index 00000000000..6a438913fd8 --- /dev/null +++ b/cpp/src/traversal/bellman_ford_sg.cu @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "bellman_ford_impl.cuh" + +namespace cugraph { + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t source, + int64_t* predecessors, + float* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template void bellman_ford( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t source, + int64_t* predecessors, + double* distances); + +} // namespace cugraph From ae361779d5b351e60c5ba301d0ffd236a9ada0f9 Mon Sep 17 00:00:00 2001 From: Naim Date: Tue, 26 Mar 2024 00:32:07 +0100 Subject: [PATCH 2/4] bellman_ford to detect negative cycle --- cpp/src/traversal/bellman_ford_impl.cuh | 73 +++++++++++++++---------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/cpp/src/traversal/bellman_ford_impl.cuh b/cpp/src/traversal/bellman_ford_impl.cuh index c7bf56b208f..ec144aa5011 100644 --- a/cpp/src/traversal/bellman_ford_impl.cuh +++ b/cpp/src/traversal/bellman_ford_impl.cuh @@ -104,8 +104,9 @@ void bellman_ford(raft::handle_t const& handle, // thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, local_vertices.begin()); - vertex_t itr_cnt = 0; - for (itr_cnt = 0; itr_cnt <= current_graph_view.number_of_vertices(); itr_cnt++) { + vertex_t itr_cnt = 0; + int nr_of_updated_vertices = 0; + while (itr_cnt < current_graph_view.number_of_vertices()) { if constexpr (graph_view_t::is_multi_gpu) { src_predecessor_cache = edge_src_property_t(handle, current_graph_view); @@ -199,8 +200,7 @@ void bellman_ford(raft::handle_t const& handle, static_cast(src_dist), static_cast(dst_dist)); - auto relax = (src_dist < invalid_distance) && - ((dst_dist == invalid_distance) || (dst_dist > (src_dist + wt))); + auto relax = (src_dist < invalid_distance) && (dst_dist > (src_dist + wt)); return relax ? thrust::make_tuple(src_dist + wt, src) : thrust::make_tuple(invalid_distance, invalid_vertex); @@ -261,6 +261,28 @@ void bellman_ford(raft::handle_t const& handle, handle.get_stream()); } + if (graph_view_t::is_multi_gpu) { + auto const comm_rank = handle.get_comms().get_rank(); + auto const comm_size = handle.get_comms().get_size(); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto edge_reduced_dst_keys_title = + std::string("edge_reduced_dst_keys_").append(std::to_string(comm_rank)); + raft::print_device_vector(edge_reduced_dst_keys_title.c_str(), + edge_reduced_dst_keys.begin(), + edge_reduced_dst_keys.size(), + std::cout); + + auto minimum_weights_title = + std::string("minimum_weights_").append(std::to_string(comm_rank)); + raft::print_device_vector( + minimum_weights_title.c_str(), minimum_weights.begin(), minimum_weights.size(), std::cout); + + auto closest_preds_title = std::string("closest_preds_").append(std::to_string(comm_rank)); + raft::print_device_vector( + closest_preds_title.c_str(), closest_preds.begin(), closest_preds.size(), std::cout); + } + using flag_t = uint8_t; rmm::device_uvector updated = rmm::device_uvector( current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -281,22 +303,29 @@ void bellman_ford(raft::handle_t const& handle, current_graph_view.local_vertex_partition_range_last()] __device__(auto v_dist_pred) { auto v = thrust::get<0>(v_dist_pred); if ((v < v_first) || (v >= v_last)) { - printf("%d out of range [%d %d)\n", + printf("%d > > > > > out of range [%d %d)\n", static_cast(v), static_cast(v_first), static_cast(v_last)); } + auto dist = thrust::get<1>(v_dist_pred); auto pred = thrust::get<2>(v_dist_pred); auto v_offset = v - v_first; - if (distances[v_offset] < dist) { + + printf(" vertex %d : [pred=%d dist = %f)\n", + static_cast(v), + static_cast(pred), + static_cast(dist)); + + if (pred != invalid_vertex) { updated[v_offset] = flag_t{true}; distances[v_offset] = dist; predecessors[v_offset] = pred; } }); - int nr_of_updated_vertices = + nr_of_updated_vertices = thrust::count(handle.get_thrust_policy(), updated.begin(), updated.end(), flag_t{true}); if constexpr (graph_view_t::is_multi_gpu) { @@ -304,32 +333,18 @@ void bellman_ford(raft::handle_t const& handle, handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream()); } - if (nr_of_updated_vertices == 0) { break; } - - if (graph_view_t::is_multi_gpu) { - auto const comm_rank = handle.get_comms().get_rank(); - auto const comm_size = handle.get_comms().get_size(); + itr_cnt++; + std::cout << "itr_cnt: " << itr_cnt << std::endl; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto edge_reduced_dst_keys_title = - std::string("edge_reduced_dst_keys_").append(std::to_string(comm_rank)); - raft::print_device_vector(edge_reduced_dst_keys_title.c_str(), - edge_reduced_dst_keys.begin(), - edge_reduced_dst_keys.size(), - std::cout); - - auto minimum_weights_title = - std::string("minimum_weights_").append(std::to_string(comm_rank)); - raft::print_device_vector( - minimum_weights_title.c_str(), minimum_weights.begin(), minimum_weights.size(), std::cout); - - auto closest_preds_title = std::string("closest_preds_").append(std::to_string(comm_rank)); - raft::print_device_vector( - closest_preds_title.c_str(), closest_preds.begin(), closest_preds.size(), std::cout); + if (nr_of_updated_vertices == 0) { + std::cout << "No more updates\n"; + break; } } - if (itr_cnt == current_graph_view.local_vertex_partition_range_size()) { + std::cout << "itr_cnt (out of loop) : " << itr_cnt << std::endl; + + if ((itr_cnt == current_graph_view.number_of_vertices()) && (nr_of_updated_vertices > 0)) { std::cout << "Detected -ve cycle.\n"; } From 5691949a9332115761e0a42c6034c479034905e6 Mon Sep 17 00:00:00 2001 From: Naim Date: Sun, 31 Mar 2024 00:17:06 +0100 Subject: [PATCH 3/4] bellman_ford to detect negative cycle with queue --- cpp/src/traversal/bellman_ford_impl.cuh | 540 ++++++++++++------------ 1 file changed, 275 insertions(+), 265 deletions(-) diff --git a/cpp/src/traversal/bellman_ford_impl.cuh b/cpp/src/traversal/bellman_ford_impl.cuh index ec144aa5011..7edb29b27e9 100644 --- a/cpp/src/traversal/bellman_ford_impl.cuh +++ b/cpp/src/traversal/bellman_ford_impl.cuh @@ -19,7 +19,10 @@ #include "prims/reduce_op.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_e_by_src_dst_key.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" +#include "prims/update_v_frontier.cuh" +#include "prims/vertex_frontier.cuh" #include #include @@ -64,6 +67,8 @@ void bellman_ford(raft::handle_t const& handle, current_graph_view.attach_edge_mask(edge_masks_even.view()); + bool debug_flag = current_graph_view.number_of_vertices() <= 7; + // 2. initialize distances and predecessors auto constexpr invalid_distance = std::numeric_limits::max(); @@ -81,13 +86,30 @@ void bellman_ford(raft::handle_t const& handle, return thrust::make_tuple(distance, invalid_vertex); }); - edge_src_property_t src_predecessor_cache(handle); - edge_src_property_t src_distance_cache(handle); + // auto src_predecessor_cache = + // graph_view_t::is_multi_gpu + // ? edge_src_property_t(handle, current_graph_view) + // : edge_src_property_t(handle); + + auto src_distance_cache = + graph_view_t::is_multi_gpu + ? edge_src_property_t(handle, current_graph_view) + : edge_src_property_t(handle); + + // auto dst_predecessor_cache = + // graph_view_t::is_multi_gpu + // ? edge_dst_property_t(handle, current_graph_view) + // : edge_dst_property_t(handle); - edge_dst_property_t dst_predecessor_cache(handle); - edge_dst_property_t dst_distance_cache(handle); + // auto dst_distance_cache = + // graph_view_t::is_multi_gpu + // ? edge_dst_property_t(handle, current_graph_view) + // : edge_dst_property_t(handle); - edge_dst_property_t dst_key_cache(handle); + // auto dst_key_cache = graph_view_t::is_multi_gpu + // ? edge_dst_property_t(handle, + // current_graph_view) : edge_dst_property_t(handle); rmm::device_uvector local_vertices( current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -97,168 +119,181 @@ void bellman_ford(raft::handle_t const& handle, local_vertices.size(), current_graph_view.local_vertex_partition_range_first()); - // auto vertex_begin = - // thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_first()); - // auto vertex_end = - // thrust::make_counting_iterator(current_graph_view.local_vertex_partition_range_last()); + constexpr size_t bucket_idx_curr = 0; + constexpr size_t bucket_idx_next = 1; + constexpr size_t num_buckets = 2; - // thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, local_vertices.begin()); + vertex_frontier_t vertex_frontier(handle, + num_buckets); - vertex_t itr_cnt = 0; - int nr_of_updated_vertices = 0; - while (itr_cnt < current_graph_view.number_of_vertices()) { - if constexpr (graph_view_t::is_multi_gpu) { - src_predecessor_cache = - edge_src_property_t(handle, current_graph_view); - src_distance_cache = edge_src_property_t(handle, current_graph_view); - - dst_predecessor_cache = - edge_dst_property_t(handle, current_graph_view); - dst_distance_cache = edge_dst_property_t(handle, current_graph_view); + if (current_graph_view.in_local_vertex_partition_range_nocheck(source)) { + vertex_frontier.bucket(bucket_idx_curr).insert(source); + } - dst_key_cache = edge_dst_property_t(handle, current_graph_view); + rmm::device_uvector enqueue_counter( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); - update_edge_src_property(handle, current_graph_view, predecessors, src_predecessor_cache); - update_edge_src_property(handle, current_graph_view, distances, src_distance_cache); + thrust::fill( + handle.get_thrust_policy(), enqueue_counter.begin(), enqueue_counter.end(), vertex_t{0}); - update_edge_dst_property(handle, current_graph_view, predecessors, dst_predecessor_cache); - update_edge_dst_property(handle, current_graph_view, distances, dst_distance_cache); + vertex_t itr_cnt = 0; + int nr_of_updated_vertices = 0; - update_edge_dst_property(handle, current_graph_view, local_vertices.begin(), dst_key_cache); + vertex_t nr_vertices_n_plus_times = 0; + while (true) { + if constexpr (graph_view_t::is_multi_gpu) { + // update_edge_src_property(handle, + // current_graph_view, + // vertex_frontier.bucket(bucket_idx_curr).begin(), + // vertex_frontier.bucket(bucket_idx_curr).end(), + // predecessors, + // src_predecessor_cache); + update_edge_src_property(handle, + current_graph_view, + vertex_frontier.bucket(bucket_idx_curr).begin(), + vertex_frontier.bucket(bucket_idx_curr).end(), + distances, + src_distance_cache); + + // update_edge_dst_property(handle, + // current_graph_view, + // vertex_frontier.bucket(bucket_idx_curr).begin(), + // vertex_frontier.bucket(bucket_idx_curr).end(), + // predecessors, + // dst_predecessor_cache); + // update_edge_dst_property(handle, + // current_graph_view, + // vertex_frontier.bucket(bucket_idx_curr).begin(), + // vertex_frontier.bucket(bucket_idx_curr).end(), + // distances, + // dst_distance_cache); + + // update_edge_dst_property(handle, + // current_graph_view, + // vertex_frontier.bucket(bucket_idx_curr).begin(), + // vertex_frontier.bucket(bucket_idx_curr).end(), + // local_vertices.begin(), + // dst_key_cache); } + /* auto src_input_property_values = graph_view_t::is_multi_gpu - ? view_concat(src_predecessor_cache.view(), src_distance_cache.view()) - : view_concat(detail::edge_major_property_view_t(predecessors), - detail::edge_major_property_view_t(distances)); - - auto dst_input_property_values = - graph_view_t::is_multi_gpu - ? view_concat(dst_predecessor_cache.view(), dst_distance_cache.view()) - : view_concat( - detail::edge_minor_property_view_t(predecessors, - vertex_t{0}), - detail::edge_minor_property_view_t(distances, weight_t{0})); + // ? view_concat(src_predecessor_cache.view(), src_distance_cache.view()) + ? src_distance_cache.view() + // : view_concat(detail::edge_major_property_view_t(predecessors), + // detail::edge_major_property_view_t(distances)); + : detail::edge_major_property_view_t(distances); + + + auto dst_input_property_values = graph_view_t::is_multi_gpu + // ? view_concat(dst_predecessor_cache.view(), dst_distance_cache.view()) + ? dst_distance_cache.view() + // : view_concat( + // detail::edge_minor_property_view_t(predecessors, + // vertex_t{0}), + // detail::edge_minor_property_view_t(distances, + // weight_t{0})); + + // : view_concat( + detail::edge_minor_property_view_t(distances, weight_t{0}); + */ + auto [new_frontier_vertex_buffer, distance_predecessor_buffer] = + transform_reduce_v_frontier_outgoing_e_by_dst( + handle, + current_graph_view, + vertex_frontier.bucket(bucket_idx_curr), + graph_view_t::is_multi_gpu + ? src_distance_cache.view() + : detail::edge_major_property_view_t(distances), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [debug_flag, + distances, + v_first = current_graph_view.local_vertex_partition_range_first(), + v_last = current_graph_view + .local_vertex_partition_range_last()] __device__(auto src, + auto dst, + auto src_dist, + thrust::nullopt_t, + // auto dst_dist, + // thrust::tuple src_pred_dist, + // thrust::tuple dst_pred_dist, + auto wt) { + if (dst < v_first || dst >= v_last) { + printf("\n ****** dst = %d is not in this VP \n", static_cast(dst)); + } + // auto src_pred = thrust::get<0>(src_pred_dist); + // auto src_dist = thrust::get<1>(src_pred_dist); + + // auto dst_pred = thrust::get<0>(dst_pred_dist); + // auto dst_dist = thrust::get<1>(dst_pred_dist); + + auto dst_dist = distances[dst - v_first]; + + /*src_pred = %d dst_pred = %d,*/ + if (debug_flag) + printf("src = %d dst = %d wt = %f src_dist = %f dst_dist = %f\n", + static_cast(src), + static_cast(dst), + static_cast(wt), + // static_cast(src_pred), + // static_cast(dst_pred), + static_cast(src_dist), + static_cast(dst_dist)); + + auto relax = (dst_dist > (src_dist + wt)); + + return relax ? thrust::optional>{thrust::make_tuple( + src_dist + wt, src)} + : thrust::nullopt; + }, + reduce_op::minimum>(), + true); if (graph_view_t::is_multi_gpu) { auto const comm_rank = handle.get_comms().get_rank(); auto const comm_size = handle.get_comms().get_size(); RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto local_vertices_title = std::string("local_vertices_").append(std::to_string(comm_rank)); - raft::print_device_vector( - local_vertices_title.c_str(), local_vertices.begin(), local_vertices.size(), std::cout); + auto new_frontier_vertex_buffer_title = std::string("nvb_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector(new_frontier_vertex_buffer_title.c_str(), + new_frontier_vertex_buffer.begin(), + new_frontier_vertex_buffer.size(), + std::cout); + + auto key_buffer = thrust::get<0>( + cugraph::get_dataframe_buffer_cbegin(distance_predecessor_buffer).get_iterator_tuple()); + auto value_buffer = thrust::get<1>( + cugraph::get_dataframe_buffer_cbegin(distance_predecessor_buffer).get_iterator_tuple()); RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto distances_title = std::string("distances_").append(std::to_string(comm_rank)); - raft::print_device_vector(distances_title.c_str(), - distances, - current_graph_view.local_vertex_partition_range_size(), - std::cout); + auto key_buffer_title = std::string("key_buffer_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector( + key_buffer_title.c_str(), key_buffer, new_frontier_vertex_buffer.size(), std::cout); RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto predecessors_title = std::string("predecessors_").append(std::to_string(comm_rank)); - raft::print_device_vector(predecessors_title.c_str(), - predecessors, - current_graph_view.local_vertex_partition_range_size(), - std::cout); + auto value_buffer_title = std::string("value_buffer_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector( + value_buffer_title.c_str(), value_buffer, new_frontier_vertex_buffer.size(), std::cout); } - rmm::device_uvector edge_reduced_dst_keys(0, handle.get_stream()); - rmm::device_uvector minimum_weights(0, handle.get_stream()); - rmm::device_uvector closest_preds(0, handle.get_stream()); + nr_of_updated_vertices = new_frontier_vertex_buffer.size(); - std::forward_as_tuple(edge_reduced_dst_keys, std::tie(minimum_weights, closest_preds)) = - cugraph::transform_reduce_e_by_dst_key( - handle, - current_graph_view, - src_input_property_values, - dst_input_property_values, - *edge_weight_view, - graph_view_t::is_multi_gpu ? dst_key_cache.view() - : detail::edge_minor_property_view_t( - local_vertices.begin(), vertex_t{0}), - [] __device__(auto src, - auto dst, - thrust::tuple src_pred_dist, - thrust::tuple dst_pred_dist, - auto wt) { - auto src_pred = thrust::get<0>(src_pred_dist); - auto src_dist = thrust::get<1>(src_pred_dist); - - auto dst_pred = thrust::get<0>(dst_pred_dist); - auto dst_dist = thrust::get<1>(dst_pred_dist); - - printf( - "src = %d dst = %d wt = %f src_pred = %d dst_pred = %d, src_dist = %f dst_dist = %f\n", - static_cast(src), - static_cast(dst), - static_cast(wt), - static_cast(src_pred), - static_cast(dst_pred), - static_cast(src_dist), - static_cast(dst_dist)); - - auto relax = (src_dist < invalid_distance) && (dst_dist > (src_dist + wt)); - - return relax ? thrust::make_tuple(src_dist + wt, src) - : thrust::make_tuple(invalid_distance, invalid_vertex); - }, - thrust::make_tuple(invalid_distance, invalid_vertex), - reduce_op::minimum>{}, - true); + if (graph_view_t::is_multi_gpu) { + nr_of_updated_vertices = host_scalar_allreduce( + handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } - if constexpr (graph_view_t::is_multi_gpu) { - auto vertex_partition_range_lasts = current_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts( - vertex_partition_range_lasts.size(), handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto func = cugraph::detail::compute_gpu_id_from_int_vertex_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - major_comm_size, - minor_comm_size}; - - rmm::device_uvector d_tx_value_counts(0, handle.get_stream()); - - auto triplet_first = thrust::make_zip_iterator( - edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin()); - - d_tx_value_counts = cugraph::groupby_and_count( - triplet_first, - triplet_first + edge_reduced_dst_keys.size(), - [func] __device__(auto val) { return func(thrust::get<0>(val)); }, - handle.get_comms().get_size(), - std::numeric_limits::max(), - handle.get_stream()); - - std::vector h_tx_value_counts(d_tx_value_counts.size()); - raft::update_host(h_tx_value_counts.data(), - d_tx_value_counts.data(), - d_tx_value_counts.size(), - handle.get_stream()); - handle.sync_stream(); - - std::forward_as_tuple(std::tie(edge_reduced_dst_keys, minimum_weights, closest_preds), - std::ignore) = - shuffle_values( - handle.get_comms(), - thrust::make_zip_iterator( - edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin()), - h_tx_value_counts, - handle.get_stream()); + if (nr_of_updated_vertices == 0) { + std::cout << "no update break\n"; + break; } if (graph_view_t::is_multi_gpu) { @@ -266,146 +301,120 @@ void bellman_ford(raft::handle_t const& handle, auto const comm_size = handle.get_comms().get_size(); RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto edge_reduced_dst_keys_title = - std::string("edge_reduced_dst_keys_").append(std::to_string(comm_rank)); - raft::print_device_vector(edge_reduced_dst_keys_title.c_str(), - edge_reduced_dst_keys.begin(), - edge_reduced_dst_keys.size(), - std::cout); - - auto minimum_weights_title = - std::string("minimum_weights_").append(std::to_string(comm_rank)); - raft::print_device_vector( - minimum_weights_title.c_str(), minimum_weights.begin(), minimum_weights.size(), std::cout); - - auto closest_preds_title = std::string("closest_preds_").append(std::to_string(comm_rank)); - raft::print_device_vector( - closest_preds_title.c_str(), closest_preds.begin(), closest_preds.size(), std::cout); + auto enqueue_counter_title = std::string("qc_b_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector(enqueue_counter_title.c_str(), + enqueue_counter.begin(), + enqueue_counter.size(), + std::cout); } - using flag_t = uint8_t; - rmm::device_uvector updated = rmm::device_uvector( - current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::for_each(handle.get_thrust_policy(), + new_frontier_vertex_buffer.begin(), + new_frontier_vertex_buffer.end(), + [v_first = current_graph_view.local_vertex_partition_range_first(), + v_last = current_graph_view.local_vertex_partition_range_last(), + enqueue_counter = enqueue_counter.begin()] __device__(vertex_t v) { + if (v < v_first || v >= v_last) { + printf("\n enque conter: *** v = %d is not in this VP \n", + static_cast(v)); + } + enqueue_counter[v - v_first] += 1; + }); + + if (graph_view_t::is_multi_gpu) { + auto const comm_rank = handle.get_comms().get_rank(); + auto const comm_size = handle.get_comms().get_size(); - thrust::fill(handle.get_thrust_policy(), updated.begin(), updated.end(), flag_t{false}); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto enqueue_counter_title = std::string("qc_a_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector(enqueue_counter_title.c_str(), + enqueue_counter.begin(), + enqueue_counter.size(), + std::cout); + } - thrust::for_each( + nr_vertices_n_plus_times = thrust::count_if( handle.get_thrust_policy(), - thrust::make_zip_iterator(thrust::make_tuple( - edge_reduced_dst_keys.begin(), minimum_weights.begin(), closest_preds.begin())), - thrust::make_zip_iterator(thrust::make_tuple( - edge_reduced_dst_keys.end(), minimum_weights.end(), closest_preds.end())), - [distances, - predecessors, - updated = updated.begin(), - v_first = current_graph_view.local_vertex_partition_range_first(), - v_last = - current_graph_view.local_vertex_partition_range_last()] __device__(auto v_dist_pred) { - auto v = thrust::get<0>(v_dist_pred); - if ((v < v_first) || (v >= v_last)) { - printf("%d > > > > > out of range [%d %d)\n", - static_cast(v), - static_cast(v_first), - static_cast(v_last)); - } - - auto dist = thrust::get<1>(v_dist_pred); - auto pred = thrust::get<2>(v_dist_pred); - auto v_offset = v - v_first; - - printf(" vertex %d : [pred=%d dist = %f)\n", - static_cast(v), - static_cast(pred), - static_cast(dist)); - - if (pred != invalid_vertex) { - updated[v_offset] = flag_t{true}; - distances[v_offset] = dist; - predecessors[v_offset] = pred; - } - }); - - nr_of_updated_vertices = - thrust::count(handle.get_thrust_policy(), updated.begin(), updated.end(), flag_t{true}); + enqueue_counter.begin(), + enqueue_counter.end(), + [n = current_graph_view.number_of_vertices()] __device__(auto flag) { return flag >= n; }); - if constexpr (graph_view_t::is_multi_gpu) { - nr_of_updated_vertices = host_scalar_allreduce( - handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream()); + if (graph_view_t::is_multi_gpu) { + nr_vertices_n_plus_times = host_scalar_allreduce( + handle.get_comms(), nr_vertices_n_plus_times, raft::comms::op_t::SUM, handle.get_stream()); } - itr_cnt++; - std::cout << "itr_cnt: " << itr_cnt << std::endl; - - if (nr_of_updated_vertices == 0) { - std::cout << "No more updates\n"; + if (nr_vertices_n_plus_times > 0) { + std::cout << "enque n+ break\n"; break; } - } - std::cout << "itr_cnt (out of loop) : " << itr_cnt << std::endl; - - if ((itr_cnt == current_graph_view.number_of_vertices()) && (nr_of_updated_vertices > 0)) { - std::cout << "Detected -ve cycle.\n"; - } - - /// - - /* - vertex_t color_id = 0; - while (true) { - using flag_t = uint8_t; - rmm::device_uvector is_vertex_in_mis = rmm::device_uvector( - current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), is_vertex_in_mis.begin(), is_vertex_in_mis.end(), 0); - - if (current_graph_view.compute_number_of_edges(handle) == 0) { break; } - - cugraph::edge_src_property_t src_mis_flags(handle, current_graph_view); - cugraph::edge_dst_property_t dst_mis_flags(handle, current_graph_view); - - cugraph::update_edge_src_property( - handle, current_graph_view, is_vertex_in_mis.begin(), src_mis_flags); + update_v_frontier(handle, + current_graph_view, + std::move(new_frontier_vertex_buffer), + std::move(distance_predecessor_buffer), + vertex_frontier, + std::vector{bucket_idx_next}, + distances, + thrust::make_zip_iterator(thrust::make_tuple(distances, predecessors)), + [] __device__(auto v, auto v_val, auto pushed_val) { + auto new_dist = thrust::get<0>(pushed_val); + auto update = (new_dist < v_val); + return thrust::make_tuple( + update ? thrust::optional{bucket_idx_next} : thrust::nullopt, + update ? thrust::optional>{pushed_val} + : thrust::nullopt); + }); + + vertex_frontier.bucket(bucket_idx_curr).clear(); + vertex_frontier.bucket(bucket_idx_curr).shrink_to_fit(); + + if (vertex_frontier.bucket(bucket_idx_next).aggregate_size() > 0) { + vertex_frontier.swap_buckets(bucket_idx_curr, bucket_idx_next); + } else { + std::cout << "swap break\n"; + break; + } - cugraph::update_edge_dst_property( - handle, current_graph_view, is_vertex_in_mis.begin(), dst_mis_flags); + if (graph_view_t::is_multi_gpu) { + auto const comm_rank = handle.get_comms().get_rank(); + auto const comm_size = handle.get_comms().get_size(); - if (color_id % 2 == 0) { - cugraph::transform_e( - handle, - current_graph_view, - src_mis_flags.view(), - dst_mis_flags.view(), - cugraph::edge_dummy_property_t{}.view(), - [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { - return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); - }, - edge_masks_odd.mutable_view()); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto local_vertices_title = std::string("local_vertices_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector( + local_vertices_title.c_str(), local_vertices.begin(), local_vertices.size(), std::cout); - if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); - current_graph_view.attach_edge_mask(edge_masks_odd.view()); - } else { - cugraph::transform_e( - handle, - current_graph_view, - src_mis_flags.view(), - dst_mis_flags.view(), - cugraph::edge_dummy_property_t{}.view(), - [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { - return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); - }, - edge_masks_even.mutable_view()); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto distances_title = std::string("distances_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector(distances_title.c_str(), + distances, + current_graph_view.local_vertex_partition_range_size(), + std::cout); - if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); - cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); - current_graph_view.attach_edge_mask(edge_masks_even.view()); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto predecessors_title = std::string("predecessors_").append(std::to_string(comm_rank)); + if (debug_flag) + raft::print_device_vector(predecessors_title.c_str(), + predecessors, + current_graph_view.local_vertex_partition_range_size(), + std::cout); } - color_id++; + itr_cnt++; + std::cout << "itr_cnt: " << itr_cnt << std::endl; } - */ + + std::cout << "itr_cnt (out of loop) : " << itr_cnt << std::endl; + + if (nr_vertices_n_plus_times > 0) { std::cout << "Found -ve cycle " << std::endl; } + // if ((itr_cnt == current_graph_view.number_of_vertices()) && (nr_of_updated_vertices > 0)) { + // std::cout << "Detected -ve cycle.\n"; + // } } } // namespace detail @@ -418,6 +427,7 @@ void bellman_ford(raft::handle_t const& handle, weight_t* distances) { detail::bellman_ford(handle, graph_view, edge_weight_view, source, predecessors, distances); + std::cout << " returning from cugraph::bellman\n"; } } // namespace cugraph From 9182c872c193094622a894e89900d69b0f895c2d Mon Sep 17 00:00:00 2001 From: Naim Date: Mon, 1 Apr 2024 01:04:21 +0200 Subject: [PATCH 4/4] Compute shortest paths from a source vertex using Bellman-Ford algorithm --- cpp/include/cugraph/algorithms.hpp | 36 ++++ cpp/src/traversal/bellman_ford_impl.cuh | 275 ++++-------------------- cpp/src/traversal/bellman_ford_mg.cu | 88 ++++---- cpp/src/traversal/bellman_ford_sg.cu | 88 ++++---- 4 files changed, 156 insertions(+), 331 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 1471d340cec..655c05d46ad 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1205,6 +1205,42 @@ void sssp(raft::handle_t const& handle, weight_t cutoff = std::numeric_limits::max(), bool do_expensive_check = false); +/** + * @brief Run Bellman-Ford algorithm to compute the minimum distances (and predecessors) from + * the source vertex. + * + * This function computes the distances (minimum edge weight sums) from the source vertex. If @p + * predecessors is not `nullptr`, this function calculates the predecessor of each vertex in the + * shortest-path as well. Bellman-Ford algorithm works for negative edge weights as well. If the + * input graph has negative cycle(s), the algorithm return false. + * + * @throws cugraph::logic_error on erroneous input arguments. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, + * and handles to various CUDA libraries) to run graph algorithms. + * @param[in] graph_view Graph view object. + * @param[in] edge_weight_view View object holding edge weights for @p graph_view. + * @param[in] source_vertex Source vertex to start single-source shortest-path. + * In a multi-gpu context the source vertex should be local to this GPU. + * @param[out] distances Pointer to the output distance array. + * @param[out] predecessors Pointer to the output predecessor array or `nullptr`. + * @return True if there is no negative cycle in input graph pointed by @p graph_view, and in + * that case @p distances and @p predecessors contain valid results. + * False otherwise, and in that case @p distances and @p predecessors contain invalid values. + */ +template +bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + vertex_t source_vertex, + vertex_t* predecessors, + weight_t* distances); + /* * @brief Compute the shortest distances from the given origins to all the given destinations. * diff --git a/cpp/src/traversal/bellman_ford_impl.cuh b/cpp/src/traversal/bellman_ford_impl.cuh index 7edb29b27e9..ac86e115d09 100644 --- a/cpp/src/traversal/bellman_ford_impl.cuh +++ b/cpp/src/traversal/bellman_ford_impl.cuh @@ -37,9 +37,9 @@ namespace cugraph { namespace detail { template -void bellman_ford(raft::handle_t const& handle, +bool bellman_ford(raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, + edge_property_view_t edge_weight_view, vertex_t source, vertex_t* predecessors, weight_t* distances) @@ -67,8 +67,6 @@ void bellman_ford(raft::handle_t const& handle, current_graph_view.attach_edge_mask(edge_masks_even.view()); - bool debug_flag = current_graph_view.number_of_vertices() <= 7; - // 2. initialize distances and predecessors auto constexpr invalid_distance = std::numeric_limits::max(); @@ -86,31 +84,11 @@ void bellman_ford(raft::handle_t const& handle, return thrust::make_tuple(distance, invalid_vertex); }); - // auto src_predecessor_cache = - // graph_view_t::is_multi_gpu - // ? edge_src_property_t(handle, current_graph_view) - // : edge_src_property_t(handle); - auto src_distance_cache = graph_view_t::is_multi_gpu ? edge_src_property_t(handle, current_graph_view) : edge_src_property_t(handle); - // auto dst_predecessor_cache = - // graph_view_t::is_multi_gpu - // ? edge_dst_property_t(handle, current_graph_view) - // : edge_dst_property_t(handle); - - // auto dst_distance_cache = - // graph_view_t::is_multi_gpu - // ? edge_dst_property_t(handle, current_graph_view) - // : edge_dst_property_t(handle); - - // auto dst_key_cache = graph_view_t::is_multi_gpu - // ? edge_dst_property_t(handle, - // current_graph_view) : edge_dst_property_t(handle); - rmm::device_uvector local_vertices( current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -136,71 +114,19 @@ void bellman_ford(raft::handle_t const& handle, thrust::fill( handle.get_thrust_policy(), enqueue_counter.begin(), enqueue_counter.end(), vertex_t{0}); - vertex_t itr_cnt = 0; - int nr_of_updated_vertices = 0; - - vertex_t nr_vertices_n_plus_times = 0; + vertex_t nr_times_in_queue = 0; while (true) { if constexpr (graph_view_t::is_multi_gpu) { - // update_edge_src_property(handle, - // current_graph_view, - // vertex_frontier.bucket(bucket_idx_curr).begin(), - // vertex_frontier.bucket(bucket_idx_curr).end(), - // predecessors, - // src_predecessor_cache); - update_edge_src_property(handle, - current_graph_view, - vertex_frontier.bucket(bucket_idx_curr).begin(), - vertex_frontier.bucket(bucket_idx_curr).end(), - distances, - src_distance_cache); - - // update_edge_dst_property(handle, - // current_graph_view, - // vertex_frontier.bucket(bucket_idx_curr).begin(), - // vertex_frontier.bucket(bucket_idx_curr).end(), - // predecessors, - // dst_predecessor_cache); - // update_edge_dst_property(handle, - // current_graph_view, - // vertex_frontier.bucket(bucket_idx_curr).begin(), - // vertex_frontier.bucket(bucket_idx_curr).end(), - // distances, - // dst_distance_cache); - - // update_edge_dst_property(handle, - // current_graph_view, - // vertex_frontier.bucket(bucket_idx_curr).begin(), - // vertex_frontier.bucket(bucket_idx_curr).end(), - // local_vertices.begin(), - // dst_key_cache); + cugraph::update_edge_src_property(handle, + current_graph_view, + vertex_frontier.bucket(bucket_idx_curr).begin(), + vertex_frontier.bucket(bucket_idx_curr).end(), + distances, + src_distance_cache); } - /* - auto src_input_property_values = - graph_view_t::is_multi_gpu - // ? view_concat(src_predecessor_cache.view(), src_distance_cache.view()) - ? src_distance_cache.view() - // : view_concat(detail::edge_major_property_view_t(predecessors), - // detail::edge_major_property_view_t(distances)); - : detail::edge_major_property_view_t(distances); - - - auto dst_input_property_values = graph_view_t::is_multi_gpu - // ? view_concat(dst_predecessor_cache.view(), dst_distance_cache.view()) - ? dst_distance_cache.view() - // : view_concat( - // detail::edge_minor_property_view_t(predecessors, - // vertex_t{0}), - // detail::edge_minor_property_view_t(distances, - // weight_t{0})); - - // : view_concat( - detail::edge_minor_property_view_t(distances, weight_t{0}); - */ auto [new_frontier_vertex_buffer, distance_predecessor_buffer] = - transform_reduce_v_frontier_outgoing_e_by_dst( + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( handle, current_graph_view, vertex_frontier.bucket(bucket_idx_curr), @@ -208,44 +134,19 @@ void bellman_ford(raft::handle_t const& handle, ? src_distance_cache.view() : detail::edge_major_property_view_t(distances), edge_dst_dummy_property_t{}.view(), - *edge_weight_view, - [debug_flag, - distances, + edge_weight_view, + [distances, v_first = current_graph_view.local_vertex_partition_range_first(), - v_last = current_graph_view - .local_vertex_partition_range_last()] __device__(auto src, - auto dst, - auto src_dist, - thrust::nullopt_t, - // auto dst_dist, - // thrust::tuple src_pred_dist, - // thrust::tuple dst_pred_dist, - auto wt) { - if (dst < v_first || dst >= v_last) { - printf("\n ****** dst = %d is not in this VP \n", static_cast(dst)); - } - // auto src_pred = thrust::get<0>(src_pred_dist); - // auto src_dist = thrust::get<1>(src_pred_dist); - - // auto dst_pred = thrust::get<0>(dst_pred_dist); - // auto dst_dist = thrust::get<1>(dst_pred_dist); + v_last = + current_graph_view.local_vertex_partition_range_last()] __device__(auto src, + auto dst, + auto src_dist, + thrust::nullopt_t, + auto wt) { + assert(dst < v_first || dst >= v_last); auto dst_dist = distances[dst - v_first]; - - /*src_pred = %d dst_pred = %d,*/ - if (debug_flag) - printf("src = %d dst = %d wt = %f src_dist = %f dst_dist = %f\n", - static_cast(src), - static_cast(dst), - static_cast(wt), - // static_cast(src_pred), - // static_cast(dst_pred), - static_cast(src_dist), - static_cast(dst_dist)); - - auto relax = (dst_dist > (src_dist + wt)); + auto relax = (dst_dist > (src_dist + wt)); return relax ? thrust::optional>{thrust::make_tuple( src_dist + wt, src)} @@ -253,61 +154,14 @@ void bellman_ford(raft::handle_t const& handle, }, reduce_op::minimum>(), true); - - if (graph_view_t::is_multi_gpu) { - auto const comm_rank = handle.get_comms().get_rank(); - auto const comm_size = handle.get_comms().get_size(); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto new_frontier_vertex_buffer_title = std::string("nvb_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector(new_frontier_vertex_buffer_title.c_str(), - new_frontier_vertex_buffer.begin(), - new_frontier_vertex_buffer.size(), - std::cout); - - auto key_buffer = thrust::get<0>( - cugraph::get_dataframe_buffer_cbegin(distance_predecessor_buffer).get_iterator_tuple()); - auto value_buffer = thrust::get<1>( - cugraph::get_dataframe_buffer_cbegin(distance_predecessor_buffer).get_iterator_tuple()); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto key_buffer_title = std::string("key_buffer_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector( - key_buffer_title.c_str(), key_buffer, new_frontier_vertex_buffer.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto value_buffer_title = std::string("value_buffer_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector( - value_buffer_title.c_str(), value_buffer, new_frontier_vertex_buffer.size(), std::cout); - } - - nr_of_updated_vertices = new_frontier_vertex_buffer.size(); + size_t nr_of_updated_vertices = new_frontier_vertex_buffer.size(); if (graph_view_t::is_multi_gpu) { nr_of_updated_vertices = host_scalar_allreduce( handle.get_comms(), nr_of_updated_vertices, raft::comms::op_t::SUM, handle.get_stream()); } - if (nr_of_updated_vertices == 0) { - std::cout << "no update break\n"; - break; - } - - if (graph_view_t::is_multi_gpu) { - auto const comm_rank = handle.get_comms().get_rank(); - auto const comm_size = handle.get_comms().get_size(); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto enqueue_counter_title = std::string("qc_b_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector(enqueue_counter_title.c_str(), - enqueue_counter.begin(), - enqueue_counter.size(), - std::cout); - } + if (nr_of_updated_vertices == 0) { break; } thrust::for_each(handle.get_thrust_policy(), new_frontier_vertex_buffer.begin(), @@ -315,41 +169,23 @@ void bellman_ford(raft::handle_t const& handle, [v_first = current_graph_view.local_vertex_partition_range_first(), v_last = current_graph_view.local_vertex_partition_range_last(), enqueue_counter = enqueue_counter.begin()] __device__(vertex_t v) { - if (v < v_first || v >= v_last) { - printf("\n enque conter: *** v = %d is not in this VP \n", - static_cast(v)); - } + assert(v < v_first || v >= v_last); enqueue_counter[v - v_first] += 1; }); - if (graph_view_t::is_multi_gpu) { - auto const comm_rank = handle.get_comms().get_rank(); - auto const comm_size = handle.get_comms().get_size(); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto enqueue_counter_title = std::string("qc_a_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector(enqueue_counter_title.c_str(), - enqueue_counter.begin(), - enqueue_counter.size(), - std::cout); - } - - nr_vertices_n_plus_times = thrust::count_if( - handle.get_thrust_policy(), - enqueue_counter.begin(), - enqueue_counter.end(), - [n = current_graph_view.number_of_vertices()] __device__(auto flag) { return flag >= n; }); + nr_times_in_queue = + thrust::count_if(handle.get_thrust_policy(), + enqueue_counter.begin(), + enqueue_counter.end(), + [nr_vertices = current_graph_view.number_of_vertices()] __device__( + auto freq_v) { return freq_v >= nr_vertices; }); if (graph_view_t::is_multi_gpu) { - nr_vertices_n_plus_times = host_scalar_allreduce( - handle.get_comms(), nr_vertices_n_plus_times, raft::comms::op_t::SUM, handle.get_stream()); + nr_times_in_queue = host_scalar_allreduce( + handle.get_comms(), nr_times_in_queue, raft::comms::op_t::SUM, handle.get_stream()); } - if (nr_vertices_n_plus_times > 0) { - std::cout << "enque n+ break\n"; - break; - } + if (nr_times_in_queue > 0) { break; } update_v_frontier(handle, current_graph_view, @@ -374,60 +210,25 @@ void bellman_ford(raft::handle_t const& handle, if (vertex_frontier.bucket(bucket_idx_next).aggregate_size() > 0) { vertex_frontier.swap_buckets(bucket_idx_curr, bucket_idx_next); } else { - std::cout << "swap break\n"; break; } - - if (graph_view_t::is_multi_gpu) { - auto const comm_rank = handle.get_comms().get_rank(); - auto const comm_size = handle.get_comms().get_size(); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto local_vertices_title = std::string("local_vertices_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector( - local_vertices_title.c_str(), local_vertices.begin(), local_vertices.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto distances_title = std::string("distances_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector(distances_title.c_str(), - distances, - current_graph_view.local_vertex_partition_range_size(), - std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto predecessors_title = std::string("predecessors_").append(std::to_string(comm_rank)); - if (debug_flag) - raft::print_device_vector(predecessors_title.c_str(), - predecessors, - current_graph_view.local_vertex_partition_range_size(), - std::cout); - } - - itr_cnt++; - std::cout << "itr_cnt: " << itr_cnt << std::endl; } - std::cout << "itr_cnt (out of loop) : " << itr_cnt << std::endl; - - if (nr_vertices_n_plus_times > 0) { std::cout << "Found -ve cycle " << std::endl; } - // if ((itr_cnt == current_graph_view.number_of_vertices()) && (nr_of_updated_vertices > 0)) { - // std::cout << "Detected -ve cycle.\n"; - // } + if (nr_times_in_queue > 0) { return false; } + return true; } } // namespace detail template -void bellman_ford(raft::handle_t const& handle, +bool bellman_ford(raft::handle_t const& handle, graph_view_t const& graph_view, - std::optional> edge_weight_view, + edge_property_view_t edge_weight_view, vertex_t source, vertex_t* predecessors, weight_t* distances) { - detail::bellman_ford(handle, graph_view, edge_weight_view, source, predecessors, distances); - std::cout << " returning from cugraph::bellman\n"; + return detail::bellman_ford( + handle, graph_view, edge_weight_view, source, predecessors, distances); } } // namespace cugraph diff --git a/cpp/src/traversal/bellman_ford_mg.cu b/cpp/src/traversal/bellman_ford_mg.cu index 328591728f4..5017e5d59ed 100644 --- a/cpp/src/traversal/bellman_ford_mg.cu +++ b/cpp/src/traversal/bellman_ford_mg.cu @@ -17,52 +17,46 @@ namespace cugraph { -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - double* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t source, - int64_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - double* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t source, - int64_t* predecessors, - double* distances); +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int64_t source, + int64_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int64_t source, + int64_t* predecessors, + double* distances); } // namespace cugraph diff --git a/cpp/src/traversal/bellman_ford_sg.cu b/cpp/src/traversal/bellman_ford_sg.cu index 6a438913fd8..b57c58746c3 100644 --- a/cpp/src/traversal/bellman_ford_sg.cu +++ b/cpp/src/traversal/bellman_ford_sg.cu @@ -17,52 +17,46 @@ namespace cugraph { -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - double* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t source, - int64_t* predecessors, - float* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int32_t source, - int32_t* predecessors, - double* distances); - -template void bellman_ford( - raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t source, - int64_t* predecessors, - double* distances); +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int64_t source, + int64_t* predecessors, + float* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int32_t source, + int32_t* predecessors, + double* distances); + +template bool bellman_ford(raft::handle_t const& handle, + graph_view_t const& graph_view, + edge_property_view_t edge_weight_view, + int64_t source, + int64_t* predecessors, + double* distances); } // namespace cugraph