From cb282339eb8e4f9bd0120ca76b890c17193c3e1e Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 11 Dec 2024 13:57:52 -0500 Subject: [PATCH] patch stash mapper in cavity --- include/rxmesh/cavity_manager.cuh | 18 ++- include/rxmesh/cavity_manager_impl.cuh | 146 ++++++++++++++----------- include/rxmesh/patch_lock.h | 12 +- 3 files changed, 106 insertions(+), 70 deletions(-) diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh index 70a4bc8b..35828eeb 100644 --- a/include/rxmesh/cavity_manager.cuh +++ b/include/rxmesh/cavity_manager.cuh @@ -779,7 +779,7 @@ struct CavityManager /** * @brief return the number of elements as stored in shared memory based on - * template paramter + * template parameter */ template __device__ __inline__ uint32_t get_num_elements() @@ -870,6 +870,15 @@ struct CavityManager } } + /** + * @brief build patch stash mapper that maps q's patch stash index to p's + * patch stash index + */ + __device__ __inline__ void build_patch_stash_mapper( + cooperative_groups::thread_block& block, + const PatchInfo& q_patch_info); + + // indicate if this block can write its updates to global memory during // epilogue bool m_write_to_gmem; @@ -879,7 +888,7 @@ struct CavityManager // the prefix sum of the cavities sizes. the size of the cavity is the // number of boundary edges in the cavity - // this also could have be uint16_t but we use itn since we do atomicAdd on + // this also could have be uint16_t but we use it since we do atomicAdd on // it int* m_s_cavity_size_prefix; @@ -981,6 +990,11 @@ struct CavityManager // patch stash stored in shared memory PatchStash m_s_patch_stash; + // indexed by q's patch stash id and returns the corresponding p's patch + // stash id. if the patch corresponds to p itself, we stores INVALID8-1 + // if the patch does not exits in p's patch stash, we store INVALID8 + uint8_t* m_s_patch_stash_mapper; + PatchInfo m_patch_info; Context m_context; diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh index a91aa29d..92a37524 100644 --- a/include/rxmesh/cavity_manager_impl.cuh +++ b/include/rxmesh/cavity_manager_impl.cuh @@ -380,6 +380,9 @@ CavityManager::alloc_shared_memory( m_s_patch_stash.m_stash[i] = m_patch_info.patch_stash.m_stash[i]; } + __shared__ uint8_t p_st2[PatchStash::stash_size]; + m_s_patch_stash_mapper = p_st2; + // cavity prefix sum // this assertion is because when we allocated dynamic shared memory // during kernel launch we assumed the number of cavities is at most @@ -2703,15 +2706,6 @@ CavityManager::soft_migrate_from_patch( if (!inserted) { m_s_should_slice[0] = true; } - // if (!inserted) { - // printf("\n p= %u, load factor = %f, stash load factor = - // %f", - // patch_id(), - // m_patch_info.lp_v.compute_load_factor(m_s_table_v), - // m_patch_info.lp_v.compute_load_factor( - // m_s_table_stash_v)); - // } - // assert(inserted); } block.sync(); } @@ -3276,7 +3270,7 @@ __device__ __inline__ LPPair CavityManager::migrate_vertex( assert(owner_stash_id < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(owner_stash_id, true); } else if (o != q && o != m_patch_info.patch_id && - o_stash != INVALID4) { + o_stash != INVALID8) { assert(o_stash != INVALID8); assert(o_stash < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(o_stash, true); @@ -3418,7 +3412,7 @@ __device__ __inline__ LPPair CavityManager::migrate_edge( assert(owner_stash_id < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(owner_stash_id, true); } else if (o != q && o != m_patch_info.patch_id && - o_stash != INVALID4) { + o_stash != INVALID8) { assert(o_stash != INVALID8); assert(o_stash < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(o_stash, true); @@ -3520,7 +3514,7 @@ __device__ __inline__ LPPair CavityManager::migrate_face( assert(owner_stash_id < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(owner_stash_id, true); } else if (o != q && o != m_patch_info.patch_id && - o_stash != INVALID4) { + o_stash != INVALID8) { assert(o_stash != INVALID8); assert(o_stash < m_s_patches_to_lock_mask.size()); m_s_patches_to_lock_mask.set(o_stash, true); @@ -3723,11 +3717,25 @@ CavityManager::populate_correspondence( } + // build patch stash mapper + build_patch_stash_mapper(block, q_patch_info); + block.sync(); + + // for other not-owned elements in q, we store the local id in the owner // patch and patch stash id (in q patch stash). When we store the local id // in the owner patch, we set the high bit to one to mark these elements // differently for the next for loop, where we check if these elements // exists in p + + // The q's not-owned element could be + // 1) don't exists at all in p. + // 2) elements where the owner patch is p. + // 3) elements where the owner patch is k and these + // elements have a copy in p. This will be handle in the next for loop + // + // To handle 1) and 3), we have to convert q's patch stash id to p's patch + // stash id and this is what we do in this for loop as well int q_num_elements = q_patch_info.get_num_elements()[0]; assert(q_num_elements <= s_correspondence_size); @@ -3740,65 +3748,53 @@ CavityManager::populate_correspondence( assert(s_correspondence[b] == INVALID16); assert(lp.patch_stash_id() < PatchStash::stash_size); + assert(lp.patch_stash_id() != INVALID8); + // set the high bit to one uint16_t s = lp.local_id_in_owner_patch(); - s |= (1 << 15); - s_correspondence[b] = s; - - // patch stash id in q's patch stash - s_correspondence_stash[b] = lp.patch_stash_id(); - // TODO assert that this entry is actually owned by the other patch - } - } - block.sync(); - - - // The for loop above could result into q's not-owned element that are - // 1) don't exists at all in p. - // 2) elements where the owner patch is p. This will - // handle in this for loop - // 3) elements where the owner patch is k and these - // elements have a copy in p. This will be handle in the next for loop - // - // To handle 1) and 3), we have to convert q's patch stash id to p's patch - // stash id and this is what we do in this for loop + // it might sound wise to assert that the k_patch (which is the + // patch point to by lp) is the owning patch of s. But, we are not + // even sure if we are going to use s at all. Also, later on, we + // check if k_pach is the owner (ensure_ownership) and if it is + // dirty in migrate() However, in there, we do this check on the + // patches that we are sure that we are going to change their + // ownership. + // + // const uint32_t k_patch = + // q_patch_info.patch_stash.get_patch(lp.patch_stash_id()); + // assert(m_context.m_patches_info[k_patch].is_owned(LocalT(s))); - for (int b = threadIdx.x; b < q_num_elements; b += blockThreads) { - // if the highest bit is set - const uint16_t corres = s_correspondence[b]; - if (corres != INVALID16 && (corres & (1 << 15))) { - const uint8_t q_stash_id = s_correspondence_stash[b]; - const uint32_t k_patch = - q_patch_info.patch_stash.get_patch(q_stash_id); - assert(k_patch != INVALID32); + // patch stash id in q's patch stash + const uint8_t p_stash_id = + m_s_patch_stash_mapper[lp.patch_stash_id()]; // Case 2 - if (k_patch == patch_id()) { - // clear the high bit - s_correspondence[b] = corres & ~(1 << 15); + if (p_stash_id == INVALID8 - 1u) { s_correspondence_stash[b] = INVALID8; - continue; - } - - // Case 1, early exit, i.e., if the k patch is not in p's patch - // stash, then the element is for sure not in p at all + } else { - const uint8_t p_stash_id = - m_s_patch_stash.find_patch_index(k_patch); + // Case 1, early exit, i.e., if the owner patch is not in p's + // patch stash, then the element is for sure not in p at all - // Could be either INVALID8 or the actual patch stash ID in p's - // patch stash - s_correspondence_stash[b] = p_stash_id; + // Could be either INVALID8 or the actual patch stash ID in p's + // patch stash + s_correspondence_stash[b] = p_stash_id; - if (p_stash_id == INVALID8) { - // if this patch is not in p, then reset this correspondence - // entry - s_correspondence[b] = INVALID16; + if (p_stash_id == INVALID8) { + // if this patch is not in p, then reset this correspondence + // entry + s = INVALID16; + } else { + // otherwise, mark this element by setting its highest bit + s |= (1 << 15); + } } + + s_correspondence[b] = s; } } block.sync(); @@ -3807,14 +3803,15 @@ CavityManager::populate_correspondence( // Now, in s_correspondence, there are some elements that are stored where // we have the local index in the owner patch and the patch stash in p. // some of these elements have a corresponding copy in p and we want to - // replace thier entries with the local index in p. Of course these are + // replace their entries with the local index in p. Of course these are // not-owned elements in p. So, there are different way of doing this. - // 1) outer for loop on the correspondance, and inner loop on p's table and + // 1) outer for loop on the correspondence, and inner loop on p's table and // p's table stash // 2) outer loop on p's not-owned elements and find the LPPair, then inner - // for loop in the correspondance + // for loop in the correspondence // We go with 2) since we have "probably" have smaller number of - // correspondance to search through than number of not-owned elements + // correspondence to search through than number of not-owned elements + const int num_elements = get_num_elements(); @@ -3845,7 +3842,7 @@ CavityManager::populate_correspondence( block.sync(); - // finally reset the correspondance element where their high bit is set to + // finally reset the correspondence element where their high bit is set to // one since these elements are not-owned in q but they don't appear in p for (int b = threadIdx.x; b < q_num_elements; b += blockThreads) { // if the highest bit is set @@ -3858,6 +3855,31 @@ CavityManager::populate_correspondence( } +template +__device__ __inline__ void +CavityManager::build_patch_stash_mapper( + cooperative_groups::thread_block& block, + const PatchInfo& q_patch_info) +{ + // build patch stash mapper that maps q's patch stash index to p's patch + // stash index + for (int q_stash_id = threadIdx.x; q_stash_id < PatchStash::stash_size; + q_stash_id += blockThreads) { + + const uint32_t k_patch = q_patch_info.patch_stash.get_patch(q_stash_id); + + if (k_patch != INVALID32) { + const uint8_t p_stash_id = + m_s_patch_stash.find_patch_index(k_patch); + m_s_patch_stash_mapper[q_stash_id] = p_stash_id; + } else if (k_patch == patch_id()) { + m_s_patch_stash_mapper[q_stash_id] = INVALID8 - 1u; + } else { + m_s_patch_stash_mapper[q_stash_id] = INVALID8; + } + } +} + template template __device__ __inline__ bool CavityManager::ensure_ownership( diff --git a/include/rxmesh/patch_lock.h b/include/rxmesh/patch_lock.h index 9925e789..c5f6882a 100644 --- a/include/rxmesh/patch_lock.h +++ b/include/rxmesh/patch_lock.h @@ -10,12 +10,12 @@ namespace rxmesh { */ struct PatchLock { - __device__ __host__ PatchLock() : lock(nullptr), spin(nullptr){}; - __device__ __host__ PatchLock(const PatchLock& other) = default; - __device__ __host__ PatchLock(PatchLock&&) = default; + __device__ __host__ PatchLock() : lock(nullptr), spin(nullptr) {}; + __device__ __host__ PatchLock(const PatchLock& other) = default; + __device__ __host__ PatchLock(PatchLock&&) = default; __device__ __host__ PatchLock& operator=(const PatchLock&) = default; - __device__ __host__ PatchLock& operator=(PatchLock&&) = default; - __device__ __host__ ~PatchLock() = default; + __device__ __host__ PatchLock& operator=(PatchLock&&) = default; + __device__ __host__ ~PatchLock() = default; /** @@ -58,7 +58,7 @@ struct PatchLock /** * @brief check if the patch is locked */ - __device__ bool is_locked() + __device__ bool is_locked() const { #ifdef __CUDA_ARCH__ return atomic_read(lock) == LOCKED;