Skip to content

Commit

Permalink
Merge pull request #4 from Elephantusparvus/feature/objectwise_covs
Browse files Browse the repository at this point in the history
adds objectwise stats limited to 2D region for multiobject scenes.
  • Loading branch information
amock authored Jan 21, 2025
2 parents 4d594f7 + 46a47aa commit 6b0fc35
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/rmagine_cuda/include/rmagine/math/statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,29 @@ CrossStatistics statistics_p2l(
const PointCloudView_<VRAM_CUDA>& model,
const UmeyamaReductionConstraints params);

void statistics_objectwise_p2l(
const MemoryView<Transform, VRAM_CUDA>& pre_transform,
const PointCloudView_<VRAM_CUDA>& dataset,
const PointCloudView_<VRAM_CUDA>& model,
const unsigned int& width,
const unsigned int& height,
const MemoryView<UmeyamaReductionConstraints, VRAM_CUDA>& params,
const MemoryView<AABB, VRAM_CUDA>& bboxes,
MemoryView<CrossStatistics, VRAM_CUDA>& stats);


void statistics_objectwise_p2l(
const MemoryView<Transform, RAM>& pre_transforms,
const PointCloudView_<VRAM_CUDA>& dataset,
const PointCloudView_<VRAM_CUDA>& model,
const unsigned int& width,
const unsigned int& height,
const MemoryView<UmeyamaReductionConstraints, RAM>& params,
const MemoryView<AABB, RAM>& bboxes,
MemoryView<CrossStatistics, RAM>& stats);




} // namespace rmagine

Expand Down
143 changes: 143 additions & 0 deletions src/rmagine_cuda/src/math/statistics.cu
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,101 @@ __global__ void statistics_p2l_kernel(
}
}

template<unsigned int nMemElems>
__global__ void statistics_objectwise_p2l_kernel(
const Vector* dataset_points,
const uint8_t* dataset_mask,
const unsigned int* dataset_ids,
const uint32_t width,
const uint32_t height,
const Transform* pre_transforms,
const Vector* model_points,
const Vector* model_normals,
const uint8_t* model_mask,
const unsigned int* model_ids,
const UmeyamaReductionConstraints* params,
const AABB* bboxes,
unsigned int N,
CrossStatistics* res)
{

// nMemElems == num_threads!!!!
__shared__ CrossStatistics sdata[nMemElems];

const unsigned int tid = threadIdx.x;
const unsigned int bid = blockIdx.x;
const unsigned int n_threads = blockDim.x;

const AABB bb = bboxes[bid];
const unsigned int min_col = bb.min[0];
const unsigned int min_row = bb.min[1];
const unsigned int max_col = bb.max[0];
const unsigned int max_row = bb.max[1];
const unsigned int bb_width = max_col - min_col;
const unsigned int bb_height = max_row - min_row;

const unsigned int start_idx = min_row * width + min_col;
const unsigned int n_elems = bb_width * bb_height;
const unsigned int n_elems_thread = (n_elems + n_threads - 1) / n_threads;

const Transform pre_transform = pre_transforms[bid];

CrossStatistics cross_stats = CrossStatistics::Identity();
sdata[tid] = CrossStatistics::Identity();
const UmeyamaReductionConstraints param = params[bid];

unsigned int t_idx = tid * n_elems_thread;
for(unsigned int i=0; i<n_elems_thread; i++)
{
unsigned int idx = t_idx + i;
unsigned int row = idx / bb_width;
unsigned int col = idx % bb_width;
// width is the stride from row to row.
const unsigned int data_id = start_idx + row * width + col;

if (data_id < N)
{
if( (dataset_mask == NULL || dataset_mask[data_id] > 0)
&& (model_mask == NULL || model_mask[data_id] > 0)
&& (dataset_ids == NULL || dataset_ids[data_id] == param.dataset_id)
&& (model_ids == NULL || model_ids[data_id] == param.model_id)
)
{
const Vector Di = pre_transform * dataset_points[data_id]; // read
const Vector Ii = model_points[data_id]; // read
const Vector Ni = model_normals[data_id];

const float signed_plane_dist = (Ii - Di).dot(Ni);

if(fabs(signed_plane_dist) < param.max_dist)
{
// nearest point on model
const Vector Mi = Di + Ni * signed_plane_dist;
// add Di -> Mi correspondence
sdata[tid] += CrossStatistics::Init(Di, Mi);
}
}
}
}
// sdata[tid] = cross_stats;
__syncthreads();

for(unsigned int s = nMemElems / 2; s > 0; s >>= 1)
{
if(tid < s)
{
sdata[tid] += sdata[tid + s];
}
__syncthreads();
}

if(tid == 0)
{
// printf("num_valid %i", sdata[0].n_meas);
res[bid] = sdata[0];
}
}


void statistics_p2l(
const Transform& pre_transform,
Expand Down Expand Up @@ -324,4 +419,52 @@ CrossStatistics statistics_p2l(
return ret;
}

void statistics_objectwise_p2l(
const MemoryView<Transform, VRAM_CUDA>& pre_transform,
const PointCloudView_<VRAM_CUDA>& dataset,
const PointCloudView_<VRAM_CUDA>& model,
const unsigned int& width,
const unsigned int& height,
const MemoryView<UmeyamaReductionConstraints, VRAM_CUDA>& params,
const MemoryView<AABB, VRAM_CUDA>& bboxes,
MemoryView<CrossStatistics, VRAM_CUDA>& stats)
{
const unsigned int n_outputs = stats.size(); // also number of blocks
constexpr unsigned int n_threads = 512; // also shared mem

statistics_objectwise_p2l_kernel<n_threads> <<<n_outputs, n_threads>>>(
dataset.points.raw(), dataset.mask.raw(), dataset.ids.raw(),
width, height,
pre_transform.raw(),
model.points.raw(), model.normals.raw(), model.mask.raw(), model.ids.raw(),
params.raw(),
bboxes.raw(),
dataset.points.size(),
stats.raw()
);
}

void statistics_objectwise_p2l(
const MemoryView<Transform, RAM>& pre_transforms,
const PointCloudView_<VRAM_CUDA>& dataset,
const PointCloudView_<VRAM_CUDA>& model,
const unsigned int& width,
const unsigned int& height,
const MemoryView<UmeyamaReductionConstraints, RAM>& params,
const MemoryView<AABB, RAM>& bboxes,
MemoryView<CrossStatistics, RAM>& stats)
{
// Upload it to GPU
Memory<CrossStatistics, VRAM_CUDA> stats_gpu = stats;
Memory<Transform, VRAM_CUDA> pre_transforms_gpu = pre_transforms;
Memory<UmeyamaReductionConstraints, VRAM_CUDA> params_gpu = params;
Memory<AABB, VRAM_CUDA> bboxes_gpu = bboxes;

// to write results to it
statistics_objectwise_p2l(pre_transforms_gpu, dataset, model, width, height,
params_gpu, bboxes_gpu, stats_gpu);
// download to view and therefore update 'stats' with it
stats = stats_gpu;
}

} // namespace rmagine

0 comments on commit 6b0fc35

Please sign in to comment.