diff --git a/src/rmagine_cuda/include/rmagine/math/statistics.cuh b/src/rmagine_cuda/include/rmagine/math/statistics.cuh index d37ce9b..3cdf32a 100644 --- a/src/rmagine_cuda/include/rmagine/math/statistics.cuh +++ b/src/rmagine_cuda/include/rmagine/math/statistics.cuh @@ -136,6 +136,29 @@ CrossStatistics statistics_p2l( const PointCloudView_& model, const UmeyamaReductionConstraints params); +void statistics_objectwise_p2l( + const MemoryView& pre_transform, + const PointCloudView_& dataset, + const PointCloudView_& model, + const unsigned int& width, + const unsigned int& height, + const MemoryView& params, + const MemoryView& bboxes, + MemoryView& stats); + + +void statistics_objectwise_p2l( + const MemoryView& pre_transforms, + const PointCloudView_& dataset, + const PointCloudView_& model, + const unsigned int& width, + const unsigned int& height, + const MemoryView& params, + const MemoryView& bboxes, + MemoryView& stats); + + + } // namespace rmagine diff --git a/src/rmagine_cuda/src/math/statistics.cu b/src/rmagine_cuda/src/math/statistics.cu index ad0ab8d..bac1abb 100644 --- a/src/rmagine_cuda/src/math/statistics.cu +++ b/src/rmagine_cuda/src/math/statistics.cu @@ -275,6 +275,101 @@ __global__ void statistics_p2l_kernel( } } +template +__global__ void statistics_objectwise_p2l_kernel( + const Vector* dataset_points, + const uint8_t* dataset_mask, + const unsigned int* dataset_ids, + const uint32_t width, + const uint32_t height, + const Transform* pre_transforms, + const Vector* model_points, + const Vector* model_normals, + const uint8_t* model_mask, + const unsigned int* model_ids, + const UmeyamaReductionConstraints* params, + const AABB* bboxes, + unsigned int N, + CrossStatistics* res) +{ + + // nMemElems == num_threads!!!! + __shared__ CrossStatistics sdata[nMemElems]; + + const unsigned int tid = threadIdx.x; + const unsigned int bid = blockIdx.x; + const unsigned int n_threads = blockDim.x; + + const AABB bb = bboxes[bid]; + const unsigned int min_col = bb.min[0]; + const unsigned int min_row = bb.min[1]; + const unsigned int max_col = bb.max[0]; + const unsigned int max_row = bb.max[1]; + const unsigned int bb_width = max_col - min_col; + const unsigned int bb_height = max_row - min_row; + + const unsigned int start_idx = min_row * width + min_col; + const unsigned int n_elems = bb_width * bb_height; + const unsigned int n_elems_thread = (n_elems + n_threads - 1) / n_threads; + + const Transform pre_transform = pre_transforms[bid]; + + CrossStatistics cross_stats = CrossStatistics::Identity(); + sdata[tid] = CrossStatistics::Identity(); + const UmeyamaReductionConstraints param = params[bid]; + + unsigned int t_idx = tid * n_elems_thread; + for(unsigned int i=0; i 0) + && (model_mask == NULL || model_mask[data_id] > 0) + && (dataset_ids == NULL || dataset_ids[data_id] == param.dataset_id) + && (model_ids == NULL || model_ids[data_id] == param.model_id) + ) + { + const Vector Di = pre_transform * dataset_points[data_id]; // read + const Vector Ii = model_points[data_id]; // read + const Vector Ni = model_normals[data_id]; + + const float signed_plane_dist = (Ii - Di).dot(Ni); + + if(fabs(signed_plane_dist) < param.max_dist) + { + // nearest point on model + const Vector Mi = Di + Ni * signed_plane_dist; + // add Di -> Mi correspondence + sdata[tid] += CrossStatistics::Init(Di, Mi); + } + } + } + } + // sdata[tid] = cross_stats; + __syncthreads(); + + for(unsigned int s = nMemElems / 2; s > 0; s >>= 1) + { + if(tid < s) + { + sdata[tid] += sdata[tid + s]; + } + __syncthreads(); + } + + if(tid == 0) + { + // printf("num_valid %i", sdata[0].n_meas); + res[bid] = sdata[0]; + } +} + void statistics_p2l( const Transform& pre_transform, @@ -324,4 +419,52 @@ CrossStatistics statistics_p2l( return ret; } +void statistics_objectwise_p2l( + const MemoryView& pre_transform, + const PointCloudView_& dataset, + const PointCloudView_& model, + const unsigned int& width, + const unsigned int& height, + const MemoryView& params, + const MemoryView& bboxes, + MemoryView& stats) +{ + const unsigned int n_outputs = stats.size(); // also number of blocks + constexpr unsigned int n_threads = 512; // also shared mem + + statistics_objectwise_p2l_kernel <<>>( + dataset.points.raw(), dataset.mask.raw(), dataset.ids.raw(), + width, height, + pre_transform.raw(), + model.points.raw(), model.normals.raw(), model.mask.raw(), model.ids.raw(), + params.raw(), + bboxes.raw(), + dataset.points.size(), + stats.raw() + ); +} + +void statistics_objectwise_p2l( + const MemoryView& pre_transforms, + const PointCloudView_& dataset, + const PointCloudView_& model, + const unsigned int& width, + const unsigned int& height, + const MemoryView& params, + const MemoryView& bboxes, + MemoryView& stats) +{ + // Upload it to GPU + Memory stats_gpu = stats; + Memory pre_transforms_gpu = pre_transforms; + Memory params_gpu = params; + Memory bboxes_gpu = bboxes; + + // to write results to it + statistics_objectwise_p2l(pre_transforms_gpu, dataset, model, width, height, + params_gpu, bboxes_gpu, stats_gpu); + // download to view and therefore update 'stats' with it + stats = stats_gpu; +} + } // namespace rmagine \ No newline at end of file