-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathPixelRecHits.cu
78 lines (63 loc) · 2.67 KB
/
PixelRecHits.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// C++ headers
#include <algorithm>
#include <numeric>
// CUDA runtime
#include <cuda_runtime.h>
// CMSSW headers
#include "CUDACore/cudaCheck.h"
#include "CUDACore/device_unique_ptr.h"
#include "plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h" // !
#include "plugin-SiPixelClusterizer/gpuClusteringConstants.h" // !
#include "PixelRecHits.h"
#include "gpuPixelRecHits.h"
namespace {
__global__ void setHitsLayerStart(uint32_t const* __restrict__ hitsModuleStart,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t* hitsLayerStart) {
auto i = blockIdx.x * blockDim.x + threadIdx.x;
assert(0 == hitsModuleStart[0]);
if (i < 11) {
hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]];
#ifdef GPU_DEBUG
printf("LayerStart %d %d: %d\n", i, cpeParams->layerGeometry().layerStart[i], hitsLayerStart[i]);
#endif
}
}
} // namespace
namespace pixelgpudetails {
TrackingRecHit2DCUDA PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
cudaStream_t stream) const {
auto nHits = clusters_d.nClusters();
TrackingRecHit2DCUDA hits_d(nHits, cpeParams, clusters_d.clusModuleStart(), stream);
int threadsPerBlock = 128;
int blocks = digis_d.nModules(); // active modules (with digis)
#ifdef GPU_DEBUG
std::cout << "launching getHits kernel for " << blocks << " blocks" << std::endl;
#endif
if (blocks) // protect from empty events
gpuPixelRecHits::getHits<<<blocks, threadsPerBlock, 0, stream>>>(
cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.view(), hits_d.view());
cudaCheck(cudaGetLastError());
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
#endif
// assuming full warp of threads is better than a smaller number...
if (nHits) {
setHitsLayerStart<<<1, 32, 0, stream>>>(clusters_d.clusModuleStart(), cpeParams, hits_d.hitsLayerStart());
cudaCheck(cudaGetLastError());
}
if (nHits) {
cms::cuda::fillManyFromVector(hits_d.phiBinner(), 10, hits_d.iphi(), hits_d.hitsLayerStart(), nHits, 256, stream);
cudaCheck(cudaGetLastError());
}
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
#endif
return hits_d;
}
} // namespace pixelgpudetails