From a82a46e9735fa4e5b5d6b14ec82a40c8c2f8c47d Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 6 Jan 2025 20:46:10 -0500 Subject: [PATCH] Fix share memory allocation --- apps/Remesh/collapse.cuh | 2 +- apps/Remesh/flip.cuh | 2 +- apps/Remesh/remesh_rxmesh.cuh | 5 +- apps/Remesh/smoothing.cuh | 2 +- apps/SECHistogram/sec_kernels.cuh | 2 +- apps/SECPriority/secp_kernels.cuh | 2 +- include/rxmesh/cavity_manager2.cuh | 1 + include/rxmesh/cavity_manager_impl2.cuh | 92 ++++++++++++++++++------- include/rxmesh/context.h | 27 +++----- include/rxmesh/rxmesh.cpp | 3 - include/rxmesh/rxmesh_dynamic.h | 53 +++++++++----- 11 files changed, 119 insertions(+), 72 deletions(-) diff --git a/apps/Remesh/collapse.cuh b/apps/Remesh/collapse.cuh index bd00ec24..0619a20b 100644 --- a/apps/Remesh/collapse.cuh +++ b/apps/Remesh/collapse.cuh @@ -31,7 +31,7 @@ __global__ static void __launch_bounds__(blockThreads) // a bitmask that indicates which edge we want to flip // we also use it to mark updated edges (for edge_status) - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + Bitmask edge_mask(cavity.patch_info().edges_capacity, shrd_alloc); edge_mask.reset(block); uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); diff --git a/apps/Remesh/flip.cuh b/apps/Remesh/flip.cuh index 3b4de3a8..e08f44a0 100644 --- a/apps/Remesh/flip.cuh +++ b/apps/Remesh/flip.cuh @@ -54,7 +54,7 @@ __global__ static void __launch_bounds__(blockThreads) // a bitmask that indicates which edge we want to flip // we also used it to mark the new edges - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + Bitmask edge_mask(cavity.patch_info().edges_capacity, shrd_alloc); edge_mask.reset(block); uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); diff --git a/apps/Remesh/remesh_rxmesh.cuh b/apps/Remesh/remesh_rxmesh.cuh index 2a16281a..77b16590 100644 --- a/apps/Remesh/remesh_rxmesh.cuh +++ b/apps/Remesh/remesh_rxmesh.cuh @@ -389,10 +389,9 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) timers.elapsed_millis("SplitTotal")); RXMESH_INFO("Collapse Total Time {} (ms)", timers.elapsed_millis("CollapseTotal")); - RXMESH_INFO("Flip Total Time {} (ms)", - timers.elapsed_millis("FlipTotal")); + RXMESH_INFO("Flip Total Time {} (ms)", timers.elapsed_millis("FlipTotal")); RXMESH_INFO("Smooth Total Time {} (ms)", - timers.elapsed_millis("SmoothTotal")); + timers.elapsed_millis("SmoothTotal")); report.add_member("split_time_ms", timers.elapsed_millis("SplitTotal")); report.add_member("collapse_time_ms", diff --git a/apps/Remesh/smoothing.cuh b/apps/Remesh/smoothing.cuh index 0f1426ca..af548bcd 100644 --- a/apps/Remesh/smoothing.cuh +++ b/apps/Remesh/smoothing.cuh @@ -23,7 +23,7 @@ __global__ static void __launch_bounds__(blockThreads) return; } - if (v_boundary(v_id)) { + if (v_boundary(v_id) || iter.size() == 0) { new_coords(v_id, 0) = coords(v_id, 0); new_coords(v_id, 1) = coords(v_id, 1); new_coords(v_id, 2) = coords(v_id, 2); diff --git a/apps/SECHistogram/sec_kernels.cuh b/apps/SECHistogram/sec_kernels.cuh index 571acaa7..4507c2cd 100644 --- a/apps/SECHistogram/sec_kernels.cuh +++ b/apps/SECHistogram/sec_kernels.cuh @@ -24,7 +24,7 @@ __global__ static void sec(rxmesh::Context context, // we first use this mask to set the edge we want to collapse (and then // filter them). Then after cavity.prologue, we reuse this bitmask to mark // the newly added edges - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + Bitmask edge_mask(cavity.patch_info().edges_capacity, shrd_alloc); edge_mask.reset(block); // we use this bitmask to mark the other end of to-be-collapse edge during diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh index b7d61ebc..059b5aec 100644 --- a/apps/SECPriority/secp_kernels.cuh +++ b/apps/SECPriority/secp_kernels.cuh @@ -24,7 +24,7 @@ __global__ static void secp(rxmesh::Context context, // we first use this mask to set the edge we want to collapse (and then // filter them). Then after cavity.prologue, we reuse this bitmask to mark // the newly added edges - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + Bitmask edge_mask(cavity.patch_info().edges_capacity, shrd_alloc); edge_mask.reset(block); // we use this bitmask to mark the other end of to-be-collapse edge during diff --git a/include/rxmesh/cavity_manager2.cuh b/include/rxmesh/cavity_manager2.cuh index cf202f94..e4e2b34e 100644 --- a/include/rxmesh/cavity_manager2.cuh +++ b/include/rxmesh/cavity_manager2.cuh @@ -1014,6 +1014,7 @@ struct CavityManager2 // store the boundary edges of all cavities in compact format (similar to // CSR for sparse matrices using m_s_cavity_size_prefix but no value ptr) + // overlap with m_s_boudary_edges_cavity_id uint16_t* m_s_cavity_boundary_edges; // patch stash stored in shared memory diff --git a/include/rxmesh/cavity_manager_impl2.cuh b/include/rxmesh/cavity_manager_impl2.cuh index 0681565e..aad7ae03 100644 --- a/include/rxmesh/cavity_manager_impl2.cuh +++ b/include/rxmesh/cavity_manager_impl2.cuh @@ -129,7 +129,7 @@ __device__ __forceinline__ CavityManager2::CavityManager2( std::max(face_cap_bytes, m_patch_info.lp_f.num_bytes()))); assert(m_s_cavity_id_f); - const uint16_t assumed_num_cavities = m_context.m_max_num_faces[0] / 2; + const uint16_t assumed_num_cavities = DIVIDE_UP(face_cap, 2); m_s_cavity_creator = shrd_alloc.alloc(assumed_num_cavities); assert(m_s_cavity_creator); fill_n( @@ -155,13 +155,6 @@ CavityManager2::alloc_shared_memory( const uint16_t edge_cap = m_patch_info.edges_capacity; const uint16_t face_cap = m_patch_info.faces_capacity; - const uint16_t max_vertex_cap = - static_cast(m_context.m_max_num_vertices[0]); - const uint16_t max_edge_cap = - static_cast(m_context.m_max_num_edges[0]); - const uint16_t max_face_cap = - static_cast(m_context.m_max_num_faces[0]); - __shared__ LPPair s_inv_st_v[LPHashTable::stash_size]; __shared__ LPPair s_inv_st_e[LPHashTable::stash_size]; __shared__ LPPair s_inv_st_f[LPHashTable::stash_size]; @@ -177,13 +170,9 @@ CavityManager2::alloc_shared_memory( reinterpret_cast(m_s_cavity_id_f), s_inv_st_f); - assert(max_vertex_cap >= m_s_num_vertices[0]); - assert(max_edge_cap >= m_s_num_edges[0]); - assert(max_face_cap >= m_s_num_faces[0]); - - assert(vert_cap >= m_s_num_vertices[0]); - assert(edge_cap >= m_s_num_edges[0]); - assert(face_cap >= m_s_num_faces[0]); + assert(m_context.m_max_num_vertices[0] >= m_s_num_vertices[0]); + assert(m_context.m_max_num_edges[0] >= m_s_num_edges[0]); + assert(m_context.m_max_num_faces[0] >= m_s_num_faces[0]); // load EV and FE m_s_ev = shrd_alloc.alloc(2 * edge_cap); @@ -283,9 +272,9 @@ CavityManager2::alloc_shared_memory( assert(m_s_owned_cavity_bdry_v.m_bitmask); m_s_connect_cavity_bdry_v = Bitmask(vert_cap, shrd_alloc); assert(m_s_connect_cavity_bdry_v.m_bitmask); - m_s_src_mask_v = Bitmask(max_vertex_cap, shrd_alloc); + m_s_src_mask_v = Bitmask(vert_cap, shrd_alloc); assert(m_s_src_mask_v.m_bitmask); - m_s_src_connect_mask_v = Bitmask(max_vertex_cap, shrd_alloc); + m_s_src_connect_mask_v = Bitmask(vert_cap, shrd_alloc); assert(m_s_src_connect_mask_v.m_bitmask); @@ -299,9 +288,9 @@ CavityManager2::alloc_shared_memory( m_s_in_cavity_e, m_patch_info.owned_mask_e, m_patch_info.active_mask_e); - m_s_src_mask_e = Bitmask(std::max(max_edge_cap, edge_cap), shrd_alloc); + m_s_src_mask_e = Bitmask(edge_cap, shrd_alloc); assert(m_s_src_mask_e.m_bitmask); - m_s_src_connect_mask_e = Bitmask(max_edge_cap, shrd_alloc); + m_s_src_connect_mask_e = Bitmask(edge_cap, shrd_alloc); assert(m_s_src_connect_mask_e.m_bitmask); // faces masks @@ -316,7 +305,7 @@ CavityManager2::alloc_shared_memory( m_patch_info.active_mask_f); - assert(2 * get_num_cavities() <= max_face_cap); + assert(2 * get_num_cavities() <= face_cap); #ifndef NDEBUG // EV @@ -377,10 +366,9 @@ CavityManager2::alloc_shared_memory( m_s_locked_patches_mask.reset(block); // cavity boundary edges - m_s_cavity_boundary_edges = shrd_alloc.alloc(m_s_num_edges[0]); + m_s_cavity_boundary_edges = shrd_alloc.alloc(edge_cap); assert(m_s_cavity_boundary_edges); - // lp stash __shared__ LPPair st_v[LPHashTable::stash_size]; m_s_table_stash_v = st_v; @@ -415,7 +403,7 @@ CavityManager2::alloc_shared_memory( // this assertion is because when we allocated dynamic shared memory // during kernel launch we assumed the number of cavities is at most // half the number of faces in the patch - assert(m_s_num_cavities[0] <= m_s_num_faces[0] / 2); + assert(m_s_num_cavities[0] + 1 <= DIVIDE_UP(face_cap, 2)); m_s_cavity_size_prefix = shrd_alloc.alloc(m_s_num_cavities[0] + 1); assert(m_s_cavity_size_prefix); @@ -448,6 +436,7 @@ CavityManager2::verify_reading_from_global_memory( assert(m_s_active_mask_v(v) == !m_patch_info.is_deleted(LocalVertexT(v))); + assert(v < m_s_owned_mask_v.size()); assert(m_s_owned_mask_v(v) == m_patch_info.is_owned(LocalVertexT(v))); } @@ -455,6 +444,7 @@ CavityManager2::verify_reading_from_global_memory( for (int e = threadIdx.x; e < int(m_s_num_edges[0]); e += blockThreads) { assert(e < m_s_active_mask_e.size()); assert(m_s_active_mask_e(e) == !m_patch_info.is_deleted(LocalEdgeT(e))); + assert(e < m_s_owned_mask_e.size()); assert(m_s_owned_mask_e(e) == m_patch_info.is_owned(LocalEdgeT(e))); } @@ -462,6 +452,7 @@ CavityManager2::verify_reading_from_global_memory( for (int f = threadIdx.x; f < int(m_s_num_faces[0]); f += blockThreads) { assert(f < m_s_active_mask_f.size()); assert(m_s_active_mask_f(f) == !m_patch_info.is_deleted(LocalFaceT(f))); + assert(f < m_s_owned_mask_f.size()); assert(m_s_owned_mask_f(f) == m_patch_info.is_owned(LocalFaceT(f))); } @@ -842,6 +833,8 @@ CavityManager2::construct_cavity_graph( // try to add an edge between c_a and c_b auto add_edge_gather = [&](const uint16_t c_a, const uint16_t c_b) { if (c_a != INVALID16 && c_b != INVALID16 && c_a != c_b) { + assert(c_a < m_s_active_cavity_bitmask.size()); + assert(c_b < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(c_a) && m_s_active_cavity_bitmask(c_b)) { add_edge_to_cavity_graph(c_a, c_b); @@ -853,6 +846,7 @@ CavityManager2::construct_cavity_graph( const uint16_t element_id, const uint16_t cavity_id) { if (cavity_id != INVALID16) { + assert(cavity_id < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(cavity_id)) { uint16_t prv_cavity = atomicMin(&element_cavity_id[element_id], cavity_id); @@ -869,6 +863,7 @@ CavityManager2::construct_cavity_graph( auto is_active_cavity = [&](const uint16_t cavity_id) -> bool { if (cavity_id != INVALID16) { + assert(cavity_id < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(cavity_id)) { return true; } else { @@ -1115,11 +1110,13 @@ CavityManager2::calc_cavity_maximal_independent_set( block.sync(); for (int c = threadIdx.x; c < num_cavities; c += blockThreads) { + assert(c < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(c)) { const uint16_t creator = m_s_cavity_creator[c]; assert(creator != INVALID16); if constexpr (cop == CavityOp::V || cop == CavityOp::VV || cop == CavityOp::VE || cop == CavityOp::VF) { + assert(creator < m_s_active_mask_v.size()); assert(m_s_active_mask_v(creator)); m_s_cavity_id_v[creator] = c; assert(m_s_active_mask_v(creator)); @@ -1172,6 +1169,7 @@ CavityManager2::add_edge_to_cavity_graph(const uint16_t c0, auto clear = [&](const uint16_t c, const uint16_t index) { m_s_cavity_graph_mutex.lock(c); + assert(c < m_s_active_cavity_bitmask.size()); m_s_active_cavity_bitmask.reset(c, true); assert(c * MAX_OVERLAP_CAVITIES + index < MAX_OVERLAP_CAVITIES * get_num_cavities()); @@ -1244,6 +1242,9 @@ CavityManager2::mark_vertices_through_edges() const uint16_t v0 = m_s_ev[2 * e + 0]; const uint16_t v1 = m_s_ev[2 * e + 1]; + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); + assert(m_s_active_mask_v(v0)); assert(m_s_active_mask_v(v1)); @@ -1294,6 +1295,9 @@ CavityManager2::mark_edges_through_vertices() const uint16_t c0 = m_s_cavity_id_v[v0]; const uint16_t c1 = m_s_cavity_id_v[v1]; + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); + assert(m_s_active_mask_v(v0)); assert(m_s_active_mask_v(v1)); @@ -1347,6 +1351,7 @@ CavityManager2::mark_element_scatter( const uint16_t cavity_id) { if (cavity_id != INVALID16) { + assert(cavity_id < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(cavity_id)) { uint16_t prv_cavity = atomicMin(&element_cavity_id[element_id], cavity_id); @@ -1377,6 +1382,7 @@ CavityManager2::mark_element_gather( const uint16_t cavity_id) { if (cavity_id != INVALID16) { + assert(cavity_id < m_s_active_cavity_bitmask.size()); if (m_s_active_cavity_bitmask(cavity_id)) { const uint16_t prv_element_cavity_id = element_cavity_id[element_id]; @@ -1533,6 +1539,8 @@ CavityManager2::deactivate_boundary_cavities( assert(e < m_s_owned_mask_e.size()); assert(f < m_s_owned_mask_f.size()); + assert(v0 < m_s_owned_mask_v.size()); + assert(v1 < m_s_owned_mask_v.size()); if (!m_s_owned_mask_f(f) || !m_s_owned_mask_e(e) || !m_s_owned_mask_v(v0) || !m_s_owned_mask_v(v1)) { assert(v0 < m_s_owned_cavity_bdry_v.size()); @@ -1852,6 +1860,9 @@ CavityManager2::sort_cavities_edge_loop() uint32_t v0 = m_s_ev[2 * ee + 0]; uint32_t v1 = m_s_ev[2 * ee + 1]; + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); + assert(m_s_active_mask_v(v0)); assert(m_s_active_mask_v(v1)); if (v0 == end_vertex || v1 == end_vertex) { @@ -1921,6 +1932,9 @@ CavityManager2::get_cavity_vertex(uint16_t c, const uint16_t v0 = m_s_ev[2 * edge]; const uint16_t v1 = m_s_ev[2 * edge + 1]; + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); + assert(m_s_active_mask_v(v0)); assert(m_s_active_mask_v(v1)); return VertexHandle(m_patch_info.patch_id, ((dir == 0) ? v0 : v1)); @@ -1945,6 +1959,7 @@ CavityManager2::add_vertex() return VertexHandle(); } assert(v_id < m_patch_info.vertices_capacity); + assert(v_id < m_s_active_mask_v.size()); assert(m_s_active_mask_v(v_id)); assert(v_id < m_s_owned_mask_v.size()); @@ -1981,6 +1996,10 @@ CavityManager2::add_edge(const VertexHandle src, assert(e_id < m_patch_info.edges_capacity); assert(e_id < m_s_active_mask_e.size()); assert(m_s_active_mask_e(e_id)); + + assert(src.local_id() < m_s_active_mask_v.size()); + assert(dest.local_id() < m_s_active_mask_v.size()); + assert(m_s_active_mask_v(src.local_id())); assert(m_s_active_mask_v(dest.local_id())); @@ -2120,6 +2139,7 @@ __device__ __forceinline__ void CavityManager2::get_vertices( assert(eh.patch_id() == m_patch_info.patch_id); assert(eh.local_id() < m_s_num_edges[0]); // assert(m_s_active_mask_e(eh.local_id())); + assert(eh.local_id() < m_s_owned_mask_e.size()); assert(m_s_owned_mask_e(eh.local_id())); v0 = VertexHandle(m_patch_info.patch_id, m_s_ev[2 * eh.local_id() + 0]); @@ -2128,6 +2148,8 @@ __device__ __forceinline__ void CavityManager2::get_vertices( // assert(m_s_active_mask_v(v0.local_id())); // assert(m_s_active_mask_v(v1.local_id())); + assert(v0.local_id() < m_s_owned_mask_v.size()); + assert(v1.local_id() < m_s_owned_mask_v.size()); assert(m_s_owned_mask_v(v0.local_id())); assert(m_s_owned_mask_v(v1.local_id())); } @@ -2142,7 +2164,8 @@ __device__ __forceinline__ void CavityManager2::get_edges( assert(fh.patch_id() == m_patch_info.patch_id); assert(fh.local_id() < m_s_num_faces[0]); // assert(m_s_active_mask_e(fh.local_id())); - assert(m_s_owned_mask_e(fh.local_id())); + assert(fh.local_id() < m_s_owned_mask_f.size()); + assert(m_s_owned_mask_f(fh.local_id())); e0 = EdgeHandle(m_patch_info.patch_id, m_s_fe[3 * fh.local_id() + 0]); e1 = EdgeHandle(m_patch_info.patch_id, m_s_fe[3 * fh.local_id() + 1]); @@ -2152,6 +2175,10 @@ __device__ __forceinline__ void CavityManager2::get_edges( // assert(m_s_active_mask_e(e1.local_id())); // assert(m_s_active_mask_e(e2.local_id())); + assert(e0.local_id() < m_s_owned_mask_e.size()); + assert(e1.local_id() < m_s_owned_mask_e.size()); + assert(e2.local_id() < m_s_owned_mask_e.size()); + assert(m_s_owned_mask_e(e0.local_id())); assert(m_s_owned_mask_e(e1.local_id())); assert(m_s_owned_mask_e(e2.local_id())); @@ -2470,6 +2497,7 @@ __device__ __forceinline__ void CavityManager2::pre_migrate( for_each_cavity(block, [&](uint16_t c, uint16_t size) { for (int i = 0; i < int(size); ++i) { uint16_t vertex = get_cavity_vertex(c, i).local_id(); + assert(vertex < m_s_active_mask_v.size()); assert(m_s_active_mask_v(vertex)); assert(vertex < m_s_owned_mask_v.size()); if (m_s_owned_mask_v(vertex)) { @@ -2509,6 +2537,8 @@ CavityManager2::pre_ribbonize( assert(v0 < m_s_num_vertices[0]); assert(v1 < m_s_num_vertices[0]); + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); assert(m_s_active_mask_v(v0) || m_s_in_cavity_v(v0)); assert(m_s_active_mask_v(v1) || m_s_in_cavity_v(v1)); @@ -2550,6 +2580,7 @@ CavityManager2::set_ownership_change_bitmask( block.sync(); for (int v = threadIdx.x; v < int(m_s_num_vertices[0]); v += blockThreads) { + assert(v < m_s_owned_mask_v.size()); if (!m_s_owned_mask_v(v) && m_s_in_cavity_v(v)) { m_s_ownership_change_mask_v.set(v, true); } @@ -2581,6 +2612,8 @@ CavityManager2::set_ownership_change_bitmask( assert(v0 < m_s_in_cavity_v.size()); assert(v1 < m_s_in_cavity_v.size()); + assert(v0 < m_s_active_mask_v.size()); + assert(v1 < m_s_active_mask_v.size()); assert(m_s_active_mask_v(v0) || m_s_in_cavity_v(v0)); assert(m_s_active_mask_v(v1) || m_s_in_cavity_v(v1)); @@ -2601,6 +2634,7 @@ CavityManager2::set_ownership_change_bitmask( if (m_s_ownership_change_mask_f(f)) { for (int e = 0; e < 3; ++e) { + assert(edges[e] < m_s_owned_mask_e.size()); if (!m_s_owned_mask_e(edges[e]) && m_s_active_mask_e(edges[e])) { m_s_ownership_change_mask_e.set(edges[e], true); @@ -2622,6 +2656,7 @@ CavityManager2::set_ownership_change_bitmask( for (int i = 0; i < 2; ++i) { const uint16_t v = m_s_ev[2 * e + i]; assert(v < m_s_in_cavity_v.size()); + assert(v < m_s_active_mask_v.size()); assert(m_s_active_mask_v(v) || m_s_in_cavity_v(v)); assert(v < m_s_owned_cavity_bdry_v.size()); assert(v < m_s_not_owned_cavity_bdry_v.size()); @@ -2868,6 +2903,7 @@ CavityManager2::soft_migrate_from_patch( // we don't check if this vertex is active in global memory // since, it could have been activated/added only in shared // memory (through a previous call to mirgate_from_patch) + assert(local_id < m_s_active_mask_v.size()); assert(m_s_active_mask_v(local_id)); assert(local_id < m_s_owned_mask_v.size()); assert(!m_s_owned_mask_v(local_id)); @@ -3003,6 +3039,7 @@ CavityManager2::migrate_from_patch( // since, it could have been activated/added only in shared // memory (through a previous call to mirgate_from_patch) assert(local_id < m_s_in_cavity_v.size()); + assert(local_id < m_s_active_mask_v.size()); assert(m_s_active_mask_v(local_id) || m_s_in_cavity_v(local_id)); @@ -3939,6 +3976,7 @@ CavityManager2::recover_edges() #ifndef NDEBUG for (int i = 0; i < 2; ++i) { const uint16_t v = m_s_ev[2 * e + i]; + assert(v < m_s_active_mask_v.size()); assert(m_s_recover_v(v) || m_s_active_mask_v(v)); } #endif @@ -3954,6 +3992,7 @@ CavityManager2::recover_vertices() for (int v = threadIdx.x; v < int(m_s_num_vertices[0]); v += blockThreads) { if (m_s_recover_v(v)) { if (!m_patch_info.is_deleted(LocalVertexT(v))) { + assert(v < m_s_active_mask_v.size()); m_s_active_mask_v.set(v, true); } } @@ -3972,6 +4011,7 @@ CavityManager2::recover_vertices_through_edges() assert(v < m_s_num_vertices[0]); assert(!m_patch_info.is_deleted(LocalVertexT(v))); m_s_recover_v.set(v, true); + assert(v < m_s_active_mask_v.size()); m_s_active_mask_v.set(v, true); } } @@ -4091,9 +4131,11 @@ __device__ __forceinline__ void CavityManager2::epilogue( for (int v = threadIdx.x; v < int(m_s_active_mask_v.size()); v += blockThreads) { if (m_s_in_cavity_v(v)) { + assert(v < m_s_active_mask_v.size()); assert(!m_s_active_mask_v(v)); } if (m_s_fill_in_v(v)) { + assert(v < m_s_active_mask_v.size()); assert(m_s_active_mask_v(v)); } } @@ -4184,9 +4226,11 @@ __device__ __forceinline__ void CavityManager2::epilogue( for (int v = threadIdx.x; v < int(m_s_active_mask_v.size()); v += blockThreads) { if (m_s_in_cavity_v(v)) { + assert(v < m_s_active_mask_v.size()); m_s_active_mask_v.set(v, true); } if (m_s_fill_in_v(v)) { + assert(v < m_s_active_mask_v.size()); m_s_active_mask_v.reset(v, true); } } diff --git a/include/rxmesh/context.h b/include/rxmesh/context.h index c3637e6c..e3453634 100644 --- a/include/rxmesh/context.h +++ b/include/rxmesh/context.h @@ -26,18 +26,15 @@ class Context m_num_faces(nullptr), m_num_vertices(nullptr), m_num_patches(nullptr), - m_max_num_vertices(nullptr), - m_max_num_edges(nullptr), - m_max_num_faces(nullptr), + m_max_num_vertices(nullptr), + m_max_num_edges(nullptr), + m_max_num_faces(nullptr), m_d_vertex_prefix(nullptr), m_d_edge_prefix(nullptr), m_d_face_prefix(nullptr), m_h_vertex_prefix(nullptr), m_h_edge_prefix(nullptr), - m_h_face_prefix(nullptr), - m_max_lp_capacity_v(0), - m_max_lp_capacity_e(0), - m_max_lp_capacity_f(0), + m_h_face_prefix(nullptr), m_patches_info(nullptr), m_capacity_factor(0.0f), m_max_num_patches(0) @@ -189,7 +186,7 @@ class Context */ template __device__ __host__ __inline__ uint32_t linear_id(HandleT input) const - { + { assert(input.is_valid()); assert(input.patch_id() < m_num_patches[0]); @@ -250,9 +247,6 @@ class Context uint32_t* h_vertex_prefix, uint32_t* h_edge_prefix, uint32_t* h_face_prefix, - uint16_t max_lp_capacity_v, - uint16_t max_lp_capacity_e, - uint16_t max_lp_capacity_f, PatchInfo* d_patches, PatchScheduler scheduler) { @@ -302,10 +296,6 @@ class Context m_d_edge_prefix = d_edge_prefix; m_d_face_prefix = d_face_prefix; - m_max_lp_capacity_v = max_lp_capacity_v; - m_max_lp_capacity_e = max_lp_capacity_e; - m_max_lp_capacity_f = max_lp_capacity_f; - m_patches_info = d_patches; m_patch_scheduler = scheduler; @@ -322,10 +312,9 @@ class Context uint32_t *m_max_num_vertices, *m_max_num_edges, *m_max_num_faces; uint32_t *m_d_vertex_prefix, *m_d_edge_prefix, *m_d_face_prefix, *m_h_vertex_prefix, *m_h_edge_prefix, *m_h_face_prefix; - uint16_t m_max_lp_capacity_v, m_max_lp_capacity_e, m_max_lp_capacity_f; - PatchInfo* m_patches_info; - float m_capacity_factor; - uint32_t m_max_num_patches; + PatchInfo* m_patches_info; + float m_capacity_factor; + uint32_t m_max_num_patches; PatchScheduler m_patch_scheduler; }; } // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp index 3b1815ee..7d1a0427 100644 --- a/include/rxmesh/rxmesh.cpp +++ b/include/rxmesh/rxmesh.cpp @@ -156,9 +156,6 @@ void RXMesh::init(const std::vector>& fv, m_h_vertex_prefix, m_h_edge_prefix, m_h_face_prefix, - max_lp_hashtable_capacity(), - max_lp_hashtable_capacity(), - max_lp_hashtable_capacity(), m_d_patches_info, sch); m_timers.stop("context.init"); diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h index e8e7714f..c67a6624 100644 --- a/include/rxmesh/rxmesh_dynamic.h +++ b/include/rxmesh/rxmesh_dynamic.h @@ -770,12 +770,14 @@ class RXMeshDynamic : public RXMeshStatic if (is_dyn) { // connectivity (FE and EV) shared memory + //$$ m_s_ev, m_s_fe size_t connectivity_shmem = 0; connectivity_shmem += 3 * face_cap * sizeof(uint16_t) + 2 * edge_cap * sizeof(uint16_t) + 2 * ShmemAllocator::default_alignment; // cavity ID (which overlapped with the inverted hashtable) + //$$ m_s_cavity_id_v, m_s_cavity_id_e, m_s_cavity_id_f size_t cavity_id_shmem = 0; cavity_id_shmem += std::max( vertex_cap * sizeof(uint16_t), @@ -792,17 +794,28 @@ class RXMeshDynamic : public RXMeshStatic // size) const uint16_t half_face_cap = DIVIDE_UP(face_cap, 2); + // stores (and compute) the size (the prefix sum) of cavity sizes + //$$ m_s_cavity_size_prefix size_t cavity_size_shmem = 0; cavity_size_shmem += half_face_cap * sizeof(int) + ShmemAllocator::default_alignment; + // cavity boundary edges used to store the cavities (outer) edges + // in compressed/compact sparse format + size_t cavity_boundary_edges = 0; + cavity_boundary_edges += + edge_cap * sizeof(uint16_t) + ShmemAllocator::default_alignment; + // cavity src element + //$$ m_s_cavity_creator size_t cavity_creator_shmem = half_face_cap * sizeof(uint16_t) + ShmemAllocator::default_alignment; // cavity boundary edges (overlaps with cavity graph) - size_t cavity_bdr_shmem = 0; - cavity_bdr_shmem += + //$$ m_s_boudary_edges_cavity_id | m_s_cavity_graph | + // m_s_temp_inv_lp + size_t boudary_edges_cavity_id = 0; + boudary_edges_cavity_id += std::max(edge_cap, uint16_t(MAX_OVERLAP_CAVITIES * half_face_cap)) * sizeof(uint16_t) + @@ -824,28 +837,32 @@ class RXMeshDynamic : public RXMeshStatic bitmasks_shmem += detail::mask_num_bytes(face_cap); // the local offset of faces used in construct_cavities_edge_loop - size_t face_offset_shmem = face_cap * sizeof(uint8_t); + //$$ m_s_face_local_offset + size_t face_offset_shmem = + face_cap * sizeof(uint8_t) + ShmemAllocator::default_alignment; // shared memory is the max of 1. static query shared memory + the // cavity ID shared memory (since we need to mark seed elements) 2. // dynamic rxmesh shared memory which includes cavity ID shared // memory and other things - RXMESH_TRACE( - "RXMeshDynamic::update_launch_box() connectivity_shmem= " - "{}, cavity_id_shmem= {}, cavity_bdr_shmem= {}, " - "cavity_size_shmem= {}, bitmasks_shmem= {}, " - "cavity_creator_shmem={}, static_shmem= {}", - connectivity_shmem, - cavity_id_shmem, - cavity_bdr_shmem, - cavity_size_shmem, - bitmasks_shmem, - cavity_creator_shmem, - static_shmem); + + // RXMESH_TRACE( + // "RXMeshDynamic::update_launch_box() connectivity_shmem= " + // "{}, cavity_id_shmem= {}, boudary_edges_cavity_id= {}, " + // "cavity_size_shmem= {}, bitmasks_shmem= {}, " + // "cavity_creator_shmem={}, static_shmem= {}", + // connectivity_shmem, + // cavity_id_shmem, + // boudary_edges_cavity_id, + // cavity_size_shmem, + // bitmasks_shmem, + // cavity_creator_shmem, + // static_shmem); + launch_box.smem_bytes_dyn = std::max( - connectivity_shmem + cavity_id_shmem + cavity_bdr_shmem + - cavity_size_shmem + bitmasks_shmem + cavity_creator_shmem + - face_offset_shmem, + connectivity_shmem + cavity_id_shmem + boudary_edges_cavity_id + + cavity_size_shmem + cavity_boundary_edges + bitmasks_shmem + + cavity_creator_shmem + face_offset_shmem, static_shmem + cavity_id_shmem + cavity_creator_shmem); } else { launch_box.smem_bytes_dyn = static_shmem;