Skip to content

Commit

Permalink
reorg secp
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahdhn committed Dec 25, 2024
1 parent 3dd2f42 commit e05473c
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 240 deletions.
1 change: 1 addition & 0 deletions apps/SECPriority/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(SOURCE_LIST
secp.cu
secp_rxmesh.cuh
secp_kernels.cuh
secp_pair.h
)

target_sources(SECPriority
Expand Down
75 changes: 34 additions & 41 deletions apps/SECPriority/secp_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
#include "../Remesh/link_condition.cuh"
#include "rxmesh/cavity_manager.cuh"

#include <cooperative_groups.h>
#include <cuda_runtime.h>
#include "secp_pair.h"

template <typename T, uint32_t blockThreads>
__global__ static void secp(rxmesh::Context context,
rxmesh::VertexAttribute<T> coords,
const int reduce_threshold,
rxmesh::EdgeAttribute<bool> e_pop_attr)
rxmesh::EdgeAttribute<bool> to_collapse)
{
using namespace rxmesh;
auto block = cooperative_groups::this_thread_block();
Expand Down Expand Up @@ -40,12 +38,12 @@ __global__ static void secp(rxmesh::Context context,
ev_query.prologue<Op::EV>(block, shrd_alloc);
block.sync();

// 1a) mark edge we want to collapse given e_pop_attr
// 1a) mark edge we want to collapse given to_collapse
for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) {
assert(eh.local_id() < cavity.patch_info().num_edges[0]);

// edge_mask.set(eh.local_id(), e_pop_attr(eh));
if (true == e_pop_attr(eh)) {
// edge_mask.set(eh.local_id(), to_collapse(eh));
if (to_collapse(eh)) {
edge_mask.set(eh.local_id(), true);
}
});
Expand Down Expand Up @@ -73,7 +71,7 @@ __global__ static void secp(rxmesh::Context context,
ev_query.epilogue(block, shrd_alloc);

// create the cavity
if (cavity.prologue(block, shrd_alloc, coords)) {
if (cavity.prologue(block, shrd_alloc, coords, to_collapse)) {
edge_mask.reset(block);
block.sync();

Expand Down Expand Up @@ -145,7 +143,7 @@ template <typename T, uint32_t blockThreads>
__global__ static void compute_edge_priorities(
rxmesh::Context context,
const rxmesh::VertexAttribute<T> coords,
PQView_t pq_view,
PQViewT pq_view,
size_t pq_num_bytes)
{
using namespace rxmesh;
Expand All @@ -154,53 +152,48 @@ __global__ static void compute_edge_priorities(
ShmemAllocator shrd_alloc;

Query<blockThreads> query(context);
auto intermediatePairs =
shrd_alloc.alloc<PriorityPair_t>(query.get_patch_info().num_edges[0]);

PriorityPairT* s_pairs =
shrd_alloc.alloc<PriorityPairT>(query.get_patch_info().num_edges[0]);
__shared__ int pair_counter;
pair_counter = 0;

auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) {
const VertexHandle v0 = iter[0];
const VertexHandle v1 = iter[1];

const Vec3<T> p0(coords(v0, 0), coords(v0, 1), coords(v0, 2));
const Vec3<T> p1(coords(v1, 0), coords(v1, 1), coords(v1, 2));
const vec3<T> p0 = coords.to_glm<3>(v0);
const vec3<T> p1 = coords.to_glm<3>(v1);

T len2 = glm::distance2(p0, p1);
const T len2 = glm::distance2(p0, p1);

auto p_e = rxmesh::detail::unpack(eh.unique_id());
// printf("p_id:%u\te_id:%hu\n", p_e.first, p_e.second);
// printf("e_id:%llu\t, len:%f\n", eh.unique_id(), len2);
assert(eh.patch_id() < (1 << 16));

// repack the EdgeHandle into smaller 32 bits for
// use with priority queue. Need to check elsewhere
// that there are less than 2^16 patches.
auto id32 = unique_id32(p_e.second, (uint16_t)p_e.first);
// auto p_e_32 = unpack32(id32);
// printf("32bit p_id:%hu\te_id:%hu\n", p_e_32.first, p_e_32.second);
const uint32_t id32 =
unique_id32(eh.local_id(), (uint16_t)eh.patch_id());

const PriorityPairT p{len2, id32};

PriorityPair_t p{len2, id32};
// PriorityPair_t p{len2, eh};
int val_counter = atomicAdd(&pair_counter, 1);

auto val_counter = atomicAdd(&pair_counter, 1);
intermediatePairs[val_counter] = p;
s_pairs[val_counter] = p;
};

auto block = cooperative_groups::this_thread_block();
query.dispatch<Op::EV>(block, shrd_alloc, edge_len);
block.sync();

char* pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes);
pq_view.push(block,
intermediatePairs,
intermediatePairs + pair_counter,
pq_shrd_mem);
pq_view.push(block, s_pairs, s_pairs + pair_counter, pq_shrd_mem);
}

template <uint32_t blockThreads>
__global__ static void pop_and_mark_edges_to_collapse(
PQView_t pq_view,
rxmesh::EdgeAttribute<bool> marked_edges,
PQViewT pq_view,
rxmesh::EdgeAttribute<bool> to_collapse,
uint32_t pop_num_edges)
{
// setup shared memory array to store the popped pairs
Expand All @@ -210,24 +203,24 @@ __global__ static void pop_and_mark_edges_to_collapse(
using namespace rxmesh;
ShmemAllocator shrd_alloc;

auto intermediatePairs = shrd_alloc.alloc<PriorityPair_t>(blockThreads);
char* pq_shrd_mem = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads));
PriorityPairT* s_pairs = shrd_alloc.alloc<PriorityPairT>(blockThreads);

char* pq_shrd_mem = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads));

cg::thread_block g = cg::this_thread_block();
pq_view.pop(
g, intermediatePairs, intermediatePairs + blockThreads, pq_shrd_mem);

int tid = blockIdx.x * blockDim.x + threadIdx.x;
int local_tid = threadIdx.x;
pq_view.pop(g, s_pairs, s_pairs + blockThreads, pq_shrd_mem);

int tid = blockIdx.x * blockDim.x + threadIdx.x;

// Make sure the index is within bounds
if (tid < pop_num_edges) {
// printf("tid: %d\n", tid);
// unpack the uid to get the patch and edge ids
auto p_e = unpack32(intermediatePairs[local_tid].second);
// printf("32bit p_id:%hu\te_id:%hu\n", p_e.first, p_e.second);
rxmesh::EdgeHandle eh(p_e.first, rxmesh::LocalEdgeT(p_e.second));
auto [patch_id, local_id] = unpack32(s_pairs[threadIdx.x].second);

EdgeHandle eh(patch_id, LocalEdgeT(local_id));

// use the eh to index into a passed in edge attribute
marked_edges(eh) = true;
to_collapse(eh) = true;
}
}
60 changes: 60 additions & 0 deletions apps/SECPriority/secp_pair.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include <cuco/pair.cuh>
#include <cuco/priority_queue.cuh>


/**
* @brief Return unique index of the local mesh element composed by the
* patch id and the local index
*
* @param local_id the local within-patch mesh element id
* @param patch_id the patch owning the mesh element
* @return
*/
constexpr __device__ __host__ __forceinline__ uint32_t
unique_id32(const uint16_t local_id, const uint16_t patch_id)
{
uint32_t ret = patch_id;
ret = (ret << 16);
ret |= local_id;
return ret;
}


/**
* @brief unpack a 32 uint to its high and low 16 bits.
* This is used to convert the unique id to its local id (16
* low bit) and patch id (high 16 bit)
* @param uid unique id
* @return a std::pair storing the patch id and local id
*/
constexpr __device__ __host__ __forceinline__ std::pair<uint16_t, uint16_t>
unpack32(uint32_t uid)
{
uint16_t local_id = uid & ((1 << 16) - 1);
uint16_t patch_id = uid >> 16;
return std::make_pair(patch_id, local_id);
}


/**
* @brief less than operator for std::pair
* @tparam T
*/
template <typename T>
struct pair_less
{
__host__ __device__ __forceinline__ bool operator()(const T& a,
const T& b) const
{
return a.first < b.first;
}
};


// Priority queue setup. Use 'pair_less' to prioritize smaller values.
using PriorityPairT = cuco::pair<float, uint32_t>;
using PriorityCompare = pair_less<PriorityPairT>;
using PriorityQueueT = cuco::priority_queue<PriorityPairT, PriorityCompare>;
using PQViewT = PriorityQueueT::device_mutable_view;
Loading

0 comments on commit e05473c

Please sign in to comment.