Skip to content

Commit

Permalink
[alpaka] format code
Browse files Browse the repository at this point in the history
  • Loading branch information
antoniopetre committed Sep 2, 2021
1 parent cae4cfd commit 1667920
Show file tree
Hide file tree
Showing 12 changed files with 258 additions and 336 deletions.
6 changes: 3 additions & 3 deletions src/alpaka/AlpakaCore/HistoContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ namespace cms {
const Vec1 blocksPerGrid(nblocks);

auto d_pc = cms::alpakatools::allocDeviceBuf<int32_t>(1u);
int32_t* pc = alpaka::getPtrNative(d_pc);
int32_t *pc = alpaka::getPtrNative(d_pc);
alpaka::memset(queue, d_pc, 0, 1u);

const WorkDiv1 &workDiv = cms::alpakatools::make_workdiv(blocksPerGrid, threadsPerBlockOrElementsPerThread);
Expand Down Expand Up @@ -248,7 +248,7 @@ namespace cms {
off[nbins()] = uint32_t(off[nbins() - 1]);
return;
}

cms::alpakatools::for_each_element_in_grid_strided(acc, totbins(), m, [&](uint32_t i) { off[i] = n; });
}

Expand All @@ -262,7 +262,7 @@ namespace cms {
template <typename T_Acc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const T_Acc &acc, T t, index_type j) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_OFFLOAD(b < nbins());
auto w = atomicDecrement(acc, off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
bins[w - 1] = j;
Expand Down
52 changes: 12 additions & 40 deletions src/alpaka/AlpakaCore/alpakaWorkDivHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,18 +326,16 @@ namespace cms {
template <typename T, typename T_Acc>
class elements_with_stride {
public:

ALPAKA_FN_ACC elements_with_stride(const T_Acc& acc,
T extent,
Idx elementIdxShift = 0,
const unsigned int dimIndex = 0) {

const Idx threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[dimIndex]);
const Idx blockIdxInGrid(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[dimIndex]);

const Idx blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[dimIndex]);
const Idx gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[dimIndex]);

thread_ = blockDimension * blockIdxInGrid + threadIdxLocal;
thread_ = thread_ + elementIdxShift; // Add the shift
stride_ = gridDimension * blockDimension;
Expand All @@ -347,9 +345,8 @@ namespace cms {
}

ALPAKA_FN_ACC elements_with_stride(const T_Acc& acc) {

const Idx gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[0]);
elements_with_stride(acc, gridDimension);
elements_with_stride(acc, gridDimension);
}

class iterator {
Expand All @@ -359,15 +356,14 @@ namespace cms {
ALPAKA_FN_ACC constexpr T operator*() const { return index_; }

ALPAKA_FN_ACC constexpr iterator& operator++() {

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// increment the index
index_ += stride_;
if (index_ < extent_)
return *this;

#else // CPU Backend
// Iterate over all the elements for one thread
#else // CPU Backend \
// Iterate over all the elements for one thread
index_ += 1;
if (index_ < old_index_ + blockDim && index_ < extent_) {
return *this;
Expand All @@ -392,12 +388,7 @@ namespace cms {

private:
ALPAKA_FN_ACC constexpr iterator(T thread, T stride, T extent, T blockDim)
: thread_{thread},
stride_{stride},
extent_{extent},
index_{thread_},
old_index_{index_},
blockDim{blockDim} {}
: thread_{thread}, stride_{stride}, extent_{extent}, index_{thread_}, old_index_{index_}, blockDim{blockDim} {}

ALPAKA_FN_ACC constexpr iterator(T thread, T stride, T extent, T index, T blockDim)
: thread_{thread}, stride_{stride}, extent_{extent}, index_{index}, old_index_{index_}, blockDim{blockDim} {}
Expand Down Expand Up @@ -429,7 +420,6 @@ namespace cms {
class elements_with_stride_1d {
public:
ALPAKA_FN_ACC elements_with_stride_1d(const T_Acc& acc) {

const Vec3 threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc));
const Vec3 blockIdxInGrid(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc));

Expand Down Expand Up @@ -469,7 +459,6 @@ namespace cms {
ALPAKA_FN_ACC Vec3 operator*() const { return index_; }

ALPAKA_FN_ACC constexpr iterator& operator++() {

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// increment the first coordinate
index_[0u] += stride_[0u];
Expand Down Expand Up @@ -514,12 +503,7 @@ namespace cms {

private:
ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 blockDim)
: thread_{thread},
stride_{stride},
extent_{extent},
index_{thread_},
old_index_{index_},
blockDim{blockDim} {}
: thread_{thread}, stride_{stride}, extent_{extent}, index_{thread_}, old_index_{index_}, blockDim{blockDim} {}

ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 index, Vec3 blockDim)
: thread_{thread}, stride_{stride}, extent_{extent}, index_{index}, old_index_{index_}, blockDim{blockDim} {}
Expand Down Expand Up @@ -551,7 +535,6 @@ namespace cms {
class elements_with_stride_2d {
public:
ALPAKA_FN_ACC elements_with_stride_2d(const T_Acc& acc) {

const Vec3 threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc));
const Vec3 blockIdxInGrid(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc));

Expand Down Expand Up @@ -591,7 +574,6 @@ namespace cms {
ALPAKA_FN_ACC Vec3 operator*() const { return index_; }

ALPAKA_FN_ACC constexpr iterator& operator++() {

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// increment the first coordinate
index_[0u] += stride_[0u];
Expand Down Expand Up @@ -645,12 +627,7 @@ namespace cms {

private:
ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 blockDim)
: thread_{thread},
stride_{stride},
extent_{extent},
index_{thread_},
old_index_{index_},
blockDim{blockDim} {}
: thread_{thread}, stride_{stride}, extent_{extent}, index_{thread_}, old_index_{index_}, blockDim{blockDim} {}

ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 index, Vec3 blockDim)
: thread_{thread}, stride_{stride}, extent_{extent}, index_{index}, old_index_{index_}, blockDim{blockDim} {}
Expand Down Expand Up @@ -682,7 +659,6 @@ namespace cms {
class elements_with_stride_3d {
public:
ALPAKA_FN_ACC elements_with_stride_3d(const T_Acc& acc) {

const Vec3 threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc));
const Vec3 blockIdxInGrid(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc));

Expand All @@ -692,7 +668,8 @@ namespace cms {
thread_ = {blockDimension[0u] * blockIdxInGrid[0u] + threadIdxLocal[0u],
blockDimension[1u] * blockIdxInGrid[1u] + threadIdxLocal[1u],
blockDimension[2u] * blockIdxInGrid[2u] + threadIdxLocal[2u]};
stride_ = {blockDimension[0u] * gridDimension[0u], blockDimension[1u] * gridDimension[1u],
stride_ = {blockDimension[0u] * gridDimension[0u],
blockDimension[1u] * gridDimension[1u],
blockDimension[2u] * gridDimension[2u]};
extent_ = stride_;

Expand All @@ -711,7 +688,8 @@ namespace cms {
blockDimension[1u] * blockIdxInGrid[1u] + threadIdxLocal[1u],
blockDimension[2u] * blockIdxInGrid[2u] + threadIdxLocal[2u]};
thread_ = thread_ + elementIdxShift;
stride_ = {blockDimension[0u] * gridDimension[0u], blockDimension[1u] * gridDimension[1u],
stride_ = {blockDimension[0u] * gridDimension[0u],
blockDimension[1u] * gridDimension[1u],
blockDimension[2u] * gridDimension[2u]};

blockDim = blockDimension;
Expand All @@ -724,7 +702,6 @@ namespace cms {
ALPAKA_FN_ACC Vec3 operator*() const { return index_; }

ALPAKA_FN_ACC constexpr iterator& operator++() {

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// increment the first coordinate
index_[0u] += stride_[0u];
Expand Down Expand Up @@ -785,12 +762,7 @@ namespace cms {

private:
ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 blockDim)
: thread_{thread},
stride_{stride},
extent_{extent},
index_{thread_},
old_index_{index_},
blockDim{blockDim} {}
: thread_{thread}, stride_{stride}, extent_{extent}, index_{thread_}, old_index_{index_}, blockDim{blockDim} {}

ALPAKA_FN_ACC iterator(Vec3 thread, Vec3 stride, Vec3 extent, Vec3 index, Vec3 blockDim)
: thread_{thread}, stride_{stride}, extent_{extent}, index_{index}, old_index_{index_}, blockDim{blockDim} {}
Expand Down
6 changes: 3 additions & 3 deletions src/alpaka/AlpakaCore/prefixScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ namespace cms {
template <typename T>
struct multiBlockPrefixScan {
template <typename T_Acc>
ALPAKA_FN_ACC void operator()(const T_Acc& acc, T const* ci, T* co, int32_t size, int32_t *pc) const {
ALPAKA_FN_ACC void operator()(const T_Acc& acc, T const* ci, T* co, int32_t size, int32_t* pc) const {
uint32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
uint32_t const threadDimension(alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]);
uint32_t const blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
Expand All @@ -152,7 +152,7 @@ namespace cms {
auto& ws = alpaka::declareSharedVar<T[32], __COUNTER__>(acc);
if (size - off > 0)
blockPrefixScan(acc, ci + off, co + off, std::min(int(blockDimension * threadDimension), size - off), ws);

auto& isLastBlockDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
if (0 == threadIdx) {
cms::alpakatools::threadfence(acc);
Expand All @@ -170,7 +170,7 @@ namespace cms {
auto& psum = alpaka::declareSharedVar<T[1024], __COUNTER__>(acc);

ALPAKA_ASSERT_OFFLOAD(static_cast<int32_t>(blockDimension * threadDimension) >= gridDimension);

for (int elemId = 0; elemId < static_cast<int>(threadDimension); ++elemId) {
int index = +threadIdx * threadDimension + elemId;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,58 +246,59 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
(*apc2) = 0;
} // ready for next kernel

for (uint32_t idx : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, *nCells, elementShift, dimIndexY)) {
auto cellIndex = idx;
auto &thisCell = cells[idx];
//if (thisCell.theDoubletId < 0 || thisCell.theUsed>1)
// continue;
auto innerHitId = thisCell.get_inner_hit_id();
int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
const auto *__restrict__ vi = isOuterHitOfCell[innerHitId].data();

constexpr uint32_t last_bpix1_detIndex = 96;
constexpr uint32_t last_barrel_detIndex = 1184;
auto ri = thisCell.get_inner_r(hh);
auto zi = thisCell.get_inner_z(hh);

auto ro = thisCell.get_outer_r(hh);
auto zo = thisCell.get_outer_z(hh);
auto isBarrel = thisCell.get_inner_detIndex(hh) < last_barrel_detIndex;

cms::alpakatools::for_each_element_in_block_strided(
acc,
numberOfPossibleNeighbors,
0u,
[&](uint32_t j) {
auto otherCell = vi[j]; // NB: Was with __ldg in legacy
auto &oc = cells[otherCell];
// if (cells[otherCell].theDoubletId < 0 ||
// cells[otherCell].theUsed>1 )
// continue;
auto r1 = oc.get_inner_r(hh);
auto z1 = oc.get_inner_z(hh);
// auto isBarrel = oc.get_outer_detIndex(hh) < last_barrel_detIndex;
bool aligned = GPUCACell::areAlignedRZ(
r1,
z1,
ri,
zi,
ro,
zo,
ptmin,
isBarrel ? CAThetaCutBarrel : CAThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
if (aligned && thisCell.dcaCut(hh,
oc,
oc.get_inner_detIndex(hh) < last_bpix1_detIndex ? dcaCutInnerTriplet
: dcaCutOuterTriplet,
hardCurvCut)) { // FIXME tune cuts
oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors);
thisCell.theUsed |= 1;
oc.theUsed |= 1;
}
},
dimIndexX); // loop on inner cells
} // loop on outer cells
for (uint32_t idx :
cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, *nCells, elementShift, dimIndexY)) {
auto cellIndex = idx;
auto &thisCell = cells[idx];
//if (thisCell.theDoubletId < 0 || thisCell.theUsed>1)
// continue;
auto innerHitId = thisCell.get_inner_hit_id();
int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size();
const auto *__restrict__ vi = isOuterHitOfCell[innerHitId].data();

constexpr uint32_t last_bpix1_detIndex = 96;
constexpr uint32_t last_barrel_detIndex = 1184;
auto ri = thisCell.get_inner_r(hh);
auto zi = thisCell.get_inner_z(hh);

auto ro = thisCell.get_outer_r(hh);
auto zo = thisCell.get_outer_z(hh);
auto isBarrel = thisCell.get_inner_detIndex(hh) < last_barrel_detIndex;

cms::alpakatools::for_each_element_in_block_strided(
acc,
numberOfPossibleNeighbors,
0u,
[&](uint32_t j) {
auto otherCell = vi[j]; // NB: Was with __ldg in legacy
auto &oc = cells[otherCell];
// if (cells[otherCell].theDoubletId < 0 ||
// cells[otherCell].theUsed>1 )
// continue;
auto r1 = oc.get_inner_r(hh);
auto z1 = oc.get_inner_z(hh);
// auto isBarrel = oc.get_outer_detIndex(hh) < last_barrel_detIndex;
bool aligned = GPUCACell::areAlignedRZ(
r1,
z1,
ri,
zi,
ro,
zo,
ptmin,
isBarrel ? CAThetaCutBarrel : CAThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts
if (aligned && thisCell.dcaCut(hh,
oc,
oc.get_inner_detIndex(hh) < last_bpix1_detIndex ? dcaCutInnerTriplet
: dcaCutOuterTriplet,
hardCurvCut)) { // FIXME tune cuts
oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors);
thisCell.theUsed |= 1;
oc.theUsed |= 1;
}
},
dimIndexX); // loop on inner cells
} // loop on outer cells
}
};

Expand Down Expand Up @@ -358,7 +359,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
HitContainer const *__restrict__ foundNtuplets,
Quality const *__restrict__ quality,
CAConstants::TupleMultiplicity *tupleMultiplicity) const {

for (uint32_t it : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, foundNtuplets->nbins())) {
auto nhits = foundNtuplets->size(it);
if (nhits >= 3 && quality[it] != trackQuality::dup) {
Expand All @@ -378,7 +378,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
HitContainer const *__restrict__ foundNtuplets,
Quality const *__restrict__ quality,
CAConstants::TupleMultiplicity *tupleMultiplicity) const {

for (uint32_t it : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, foundNtuplets->nbins())) {
auto nhits = foundNtuplets->size(it);
if (nhits >= 3 && quality[it] != trackQuality::dup) {
Expand All @@ -399,7 +398,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
TkSoA const *__restrict__ tracks,
CAHitNtupletGeneratorKernels::QualityCuts cuts,
Quality *__restrict__ quality) const {

for (uint32_t it : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, tuples->nbins())) {
auto nhits = tuples->size(it);
if (nhits == 0)
Expand Down Expand Up @@ -466,7 +464,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
HitContainer const *__restrict__ tuples,
Quality const *__restrict__ quality,
CAHitNtupletGeneratorKernels::Counters *counters) const {

for (uint32_t idx : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, tuples->nbins())) {
if (tuples->size(idx) == 0)
return; //guard
Expand All @@ -483,7 +480,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
HitContainer const *__restrict__ tuples,
Quality const *__restrict__ quality,
CAHitNtupletGeneratorKernels::HitToTuple *hitToTuple) const {

for (uint32_t idx : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, tuples->nbins())) {
if (tuples->size(idx) == 0)
return; // guard
Expand All @@ -501,7 +497,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
HitContainer const *__restrict__ tuples,
Quality const *__restrict__ quality,
CAHitNtupletGeneratorKernels::HitToTuple *hitToTuple) const {

for (uint32_t idx : cms::alpakatools::elements_with_stride<uint32_t, T_Acc>(acc, tuples->nbins())) {
if (tuples->size(idx) == 0)
return; // guard
Expand Down Expand Up @@ -608,7 +603,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

} // maxNh
} // hitToTuple.size
} // loop over hits
} // loop over hits
}
};

Expand Down
Loading

0 comments on commit 1667920

Please sign in to comment.