Skip to content

Commit

Permalink
patch stash mapper in cavity
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahdhn committed Dec 11, 2024
1 parent 8f2c29a commit cb28233
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 70 deletions.
18 changes: 16 additions & 2 deletions include/rxmesh/cavity_manager.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ struct CavityManager

/**
* @brief return the number of elements as stored in shared memory based on
* template paramter
* template parameter
*/
template <typename HandleT>
__device__ __inline__ uint32_t get_num_elements()
Expand Down Expand Up @@ -870,6 +870,15 @@ struct CavityManager
}
}

/**
* @brief build patch stash mapper that maps q's patch stash index to p's
* patch stash index
*/
__device__ __inline__ void build_patch_stash_mapper(
cooperative_groups::thread_block& block,
const PatchInfo& q_patch_info);


// indicate if this block can write its updates to global memory during
// epilogue
bool m_write_to_gmem;
Expand All @@ -879,7 +888,7 @@ struct CavityManager

// the prefix sum of the cavities sizes. the size of the cavity is the
// number of boundary edges in the cavity
// this also could have be uint16_t but we use itn since we do atomicAdd on
// this also could have be uint16_t but we use it since we do atomicAdd on
// it
int* m_s_cavity_size_prefix;

Expand Down Expand Up @@ -981,6 +990,11 @@ struct CavityManager
// patch stash stored in shared memory
PatchStash m_s_patch_stash;

// indexed by q's patch stash id and returns the corresponding p's patch
// stash id. if the patch corresponds to p itself, we stores INVALID8-1
// if the patch does not exits in p's patch stash, we store INVALID8
uint8_t* m_s_patch_stash_mapper;

PatchInfo m_patch_info;
Context m_context;

Expand Down
146 changes: 84 additions & 62 deletions include/rxmesh/cavity_manager_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
m_s_patch_stash.m_stash[i] = m_patch_info.patch_stash.m_stash[i];
}

__shared__ uint8_t p_st2[PatchStash::stash_size];
m_s_patch_stash_mapper = p_st2;

// cavity prefix sum
// this assertion is because when we allocated dynamic shared memory
// during kernel launch we assumed the number of cavities is at most
Expand Down Expand Up @@ -2703,15 +2706,6 @@ CavityManager<blockThreads, cop>::soft_migrate_from_patch(
if (!inserted) {
m_s_should_slice[0] = true;
}
// if (!inserted) {
// printf("\n p= %u, load factor = %f, stash load factor =
// %f",
// patch_id(),
// m_patch_info.lp_v.compute_load_factor(m_s_table_v),
// m_patch_info.lp_v.compute_load_factor(
// m_s_table_stash_v));
// }
// assert(inserted);
}
block.sync();
}
Expand Down Expand Up @@ -3276,7 +3270,7 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_vertex(
assert(owner_stash_id < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(owner_stash_id, true);
} else if (o != q && o != m_patch_info.patch_id &&
o_stash != INVALID4) {
o_stash != INVALID8) {
assert(o_stash != INVALID8);
assert(o_stash < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(o_stash, true);
Expand Down Expand Up @@ -3418,7 +3412,7 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_edge(
assert(owner_stash_id < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(owner_stash_id, true);
} else if (o != q && o != m_patch_info.patch_id &&
o_stash != INVALID4) {
o_stash != INVALID8) {
assert(o_stash != INVALID8);
assert(o_stash < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(o_stash, true);
Expand Down Expand Up @@ -3520,7 +3514,7 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_face(
assert(owner_stash_id < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(owner_stash_id, true);
} else if (o != q && o != m_patch_info.patch_id &&
o_stash != INVALID4) {
o_stash != INVALID8) {
assert(o_stash != INVALID8);
assert(o_stash < m_s_patches_to_lock_mask.size());
m_s_patches_to_lock_mask.set(o_stash, true);
Expand Down Expand Up @@ -3723,11 +3717,25 @@ CavityManager<blockThreads, cop>::populate_correspondence(
}


// build patch stash mapper
build_patch_stash_mapper(block, q_patch_info);
block.sync();


// for other not-owned elements in q, we store the local id in the owner
// patch and patch stash id (in q patch stash). When we store the local id
// in the owner patch, we set the high bit to one to mark these elements
// differently for the next for loop, where we check if these elements
// exists in p

// The q's not-owned element could be
// 1) don't exists at all in p.
// 2) elements where the owner patch is p.
// 3) elements where the owner patch is k and these
// elements have a copy in p. This will be handle in the next for loop
//
// To handle 1) and 3), we have to convert q's patch stash id to p's patch
// stash id and this is what we do in this for loop as well
int q_num_elements = q_patch_info.get_num_elements<HandleT>()[0];
assert(q_num_elements <= s_correspondence_size);

Expand All @@ -3740,65 +3748,53 @@ CavityManager<blockThreads, cop>::populate_correspondence(

assert(s_correspondence[b] == INVALID16);
assert(lp.patch_stash_id() < PatchStash::stash_size);
assert(lp.patch_stash_id() != INVALID8);


// set the high bit to one
uint16_t s = lp.local_id_in_owner_patch();
s |= (1 << 15);

s_correspondence[b] = s;

// patch stash id in q's patch stash
s_correspondence_stash[b] = lp.patch_stash_id();

// TODO assert that this entry is actually owned by the other patch
}
}
block.sync();


// The for loop above could result into q's not-owned element that are
// 1) don't exists at all in p.
// 2) elements where the owner patch is p. This will
// handle in this for loop
// 3) elements where the owner patch is k and these
// elements have a copy in p. This will be handle in the next for loop
//
// To handle 1) and 3), we have to convert q's patch stash id to p's patch
// stash id and this is what we do in this for loop
// it might sound wise to assert that the k_patch (which is the
// patch point to by lp) is the owning patch of s. But, we are not
// even sure if we are going to use s at all. Also, later on, we
// check if k_pach is the owner (ensure_ownership) and if it is
// dirty in migrate() However, in there, we do this check on the
// patches that we are sure that we are going to change their
// ownership.
//
// const uint32_t k_patch =
// q_patch_info.patch_stash.get_patch(lp.patch_stash_id());
// assert(m_context.m_patches_info[k_patch].is_owned(LocalT(s)));

for (int b = threadIdx.x; b < q_num_elements; b += blockThreads) {
// if the highest bit is set
const uint16_t corres = s_correspondence[b];
if (corres != INVALID16 && (corres & (1 << 15))) {
const uint8_t q_stash_id = s_correspondence_stash[b];
const uint32_t k_patch =
q_patch_info.patch_stash.get_patch(q_stash_id);

assert(k_patch != INVALID32);
// patch stash id in q's patch stash
const uint8_t p_stash_id =
m_s_patch_stash_mapper[lp.patch_stash_id()];

// Case 2
if (k_patch == patch_id()) {
// clear the high bit
s_correspondence[b] = corres & ~(1 << 15);
if (p_stash_id == INVALID8 - 1u) {
s_correspondence_stash[b] = INVALID8;
continue;
}

// Case 1, early exit, i.e., if the k patch is not in p's patch
// stash, then the element is for sure not in p at all
} else {

const uint8_t p_stash_id =
m_s_patch_stash.find_patch_index(k_patch);
// Case 1, early exit, i.e., if the owner patch is not in p's
// patch stash, then the element is for sure not in p at all

// Could be either INVALID8 or the actual patch stash ID in p's
// patch stash
s_correspondence_stash[b] = p_stash_id;
// Could be either INVALID8 or the actual patch stash ID in p's
// patch stash
s_correspondence_stash[b] = p_stash_id;

if (p_stash_id == INVALID8) {
// if this patch is not in p, then reset this correspondence
// entry
s_correspondence[b] = INVALID16;
if (p_stash_id == INVALID8) {
// if this patch is not in p, then reset this correspondence
// entry
s = INVALID16;
} else {
// otherwise, mark this element by setting its highest bit
s |= (1 << 15);
}
}

s_correspondence[b] = s;
}
}
block.sync();
Expand All @@ -3807,14 +3803,15 @@ CavityManager<blockThreads, cop>::populate_correspondence(
// Now, in s_correspondence, there are some elements that are stored where
// we have the local index in the owner patch and the patch stash in p.
// some of these elements have a corresponding copy in p and we want to
// replace thier entries with the local index in p. Of course these are
// replace their entries with the local index in p. Of course these are
// not-owned elements in p. So, there are different way of doing this.
// 1) outer for loop on the correspondance, and inner loop on p's table and
// 1) outer for loop on the correspondence, and inner loop on p's table and
// p's table stash
// 2) outer loop on p's not-owned elements and find the LPPair, then inner
// for loop in the correspondance
// for loop in the correspondence
// We go with 2) since we have "probably" have smaller number of
// correspondance to search through than number of not-owned elements
// correspondence to search through than number of not-owned elements


const int num_elements = get_num_elements<HandleT>();

Expand Down Expand Up @@ -3845,7 +3842,7 @@ CavityManager<blockThreads, cop>::populate_correspondence(

block.sync();

// finally reset the correspondance element where their high bit is set to
// finally reset the correspondence element where their high bit is set to
// one since these elements are not-owned in q but they don't appear in p
for (int b = threadIdx.x; b < q_num_elements; b += blockThreads) {
// if the highest bit is set
Expand All @@ -3858,6 +3855,31 @@ CavityManager<blockThreads, cop>::populate_correspondence(
}


template <uint32_t blockThreads, CavityOp cop>
__device__ __inline__ void
CavityManager<blockThreads, cop>::build_patch_stash_mapper(
cooperative_groups::thread_block& block,
const PatchInfo& q_patch_info)
{
// build patch stash mapper that maps q's patch stash index to p's patch
// stash index
for (int q_stash_id = threadIdx.x; q_stash_id < PatchStash::stash_size;
q_stash_id += blockThreads) {

const uint32_t k_patch = q_patch_info.patch_stash.get_patch(q_stash_id);

if (k_patch != INVALID32) {
const uint8_t p_stash_id =
m_s_patch_stash.find_patch_index(k_patch);
m_s_patch_stash_mapper[q_stash_id] = p_stash_id;
} else if (k_patch == patch_id()) {
m_s_patch_stash_mapper[q_stash_id] = INVALID8 - 1u;
} else {
m_s_patch_stash_mapper[q_stash_id] = INVALID8;
}
}
}

template <uint32_t blockThreads, CavityOp cop>
template <typename HandleT>
__device__ __inline__ bool CavityManager<blockThreads, cop>::ensure_ownership(
Expand Down
12 changes: 6 additions & 6 deletions include/rxmesh/patch_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ namespace rxmesh {
*/
struct PatchLock
{
__device__ __host__ PatchLock() : lock(nullptr), spin(nullptr){};
__device__ __host__ PatchLock(const PatchLock& other) = default;
__device__ __host__ PatchLock(PatchLock&&) = default;
__device__ __host__ PatchLock() : lock(nullptr), spin(nullptr) {};
__device__ __host__ PatchLock(const PatchLock& other) = default;
__device__ __host__ PatchLock(PatchLock&&) = default;
__device__ __host__ PatchLock& operator=(const PatchLock&) = default;
__device__ __host__ PatchLock& operator=(PatchLock&&) = default;
__device__ __host__ ~PatchLock() = default;
__device__ __host__ PatchLock& operator=(PatchLock&&) = default;
__device__ __host__ ~PatchLock() = default;


/**
Expand Down Expand Up @@ -58,7 +58,7 @@ struct PatchLock
/**
* @brief check if the patch is locked
*/
__device__ bool is_locked()
__device__ bool is_locked() const
{
#ifdef __CUDA_ARCH__
return atomic_read(lock) == LOCKED;
Expand Down

0 comments on commit cb28233

Please sign in to comment.