Skip to content

Commit 1ac8c2c

Browse files
committed
Fixes for indexing and offsets
1 parent ed459ea commit 1ac8c2c

File tree

4 files changed

+38
-27
lines changed

4 files changed

+38
-27
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
706706
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
707707
}
708708
AllocateRegisteredMemory(clustererNN.mMemoryId);
709+
nnApplications[lane].createBoundary(clustererNNShadow);
710+
nnApplications[lane].createIndexLookup(clustererNNShadow);
709711
});
710712
if (doGPU) {
711713
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ class GPUTPCNNClusterizer : public GPUProcessor
6161

6262
// Boundary lookup table
6363
int32_t mBoundaryMapSizeRow = 0;
64-
int32_t mBoundaryMapSizePerRow = 0;
64+
int32_t mBoundaryMapSizePadsPerRow = 0;
6565
int32_t mBoundaryMapSize = 0;
66+
int32_t mBoundaryPadding = 11; // Padding on each side of the boundary map to account for pad_offset
6667
int8_t* mIsBoundary = nullptr;
6768

6869
// Index lookup table

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,11 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
9191
clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime;
9292
clustererNN.mNnClusterizerChargeArraySize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1));
9393
clustererNN.mNnClusterizerElementSize = clustererNN.mNnClusterizerChargeArraySize + (settings.nnClusterizerAddIndexData ? 3 : 0);
94-
clustererNN.mBoundaryMapSize = (3*clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW)*(GPUTPCGeometry::NPads(o2::tpc::constants::MAXGLOBALPADROW) + 2*clustererNN.mNnClusterizerSizeInputPad);
95-
clustererNN.mIndexLookupSize = 3*clustererNN.mNnClusterizerElementSize; // local row, pad, time coordinate from flat index
94+
clustererNN.mBoundaryMapSizeRow = 3 * clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW;
95+
clustererNN.mBoundaryPadding = 11; // padding on each side to account for pad_offset. N=11 since then mIsBoundary = 24320 ~< (1.5 x 2^14 = 24576) && N must be bigger than (NPads[row(end_iroc + 1)] - NPads[row(end_iroc)])/2 (=6) for pad_offset to work
96+
clustererNN.mBoundaryMapSizePadsPerRow = GPUTPCGeometry::NPads(o2::tpc::constants::MAXGLOBALPADROW) + 2*clustererNN.mBoundaryPadding;
97+
clustererNN.mBoundaryMapSize = clustererNN.mBoundaryMapSizeRow*clustererNN.mBoundaryMapSizePadsPerRow;
98+
clustererNN.mIndexLookupSize = 3*clustererNN.mNnClusterizerChargeArraySize; // local row, pad, time shift from flat index
9699
clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData;
97100
clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
98101
clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
@@ -119,27 +122,22 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
119122
clustererNN.mNnClusterizerModelReg2NumOutputNodes = mModelReg2.getNumOutputNodes()[0][1];
120123
}
121124
}
122-
createBoundary(clustererNN);
123-
createIndexLookup(clustererNN);
124125
}
125126

126127
void GPUTPCNNClusterizerHost::createBoundary(GPUTPCNNClusterizer& clustererNN) {
127128
// Call after init of the clustererNN elements
128-
clustererNN.mBoundaryMapSizeRow = 3 * clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW;
129-
clustererNN.mBoundaryMapSizePerRow = GPUTPCGeometry::NPads(o2::tpc::constants::MAXGLOBALPADROW) + 2 * clustererNN.mNnClusterizerSizeInputPad;
130129
for(int r = 0; r < clustererNN.mBoundaryMapSizeRow; r++) {
131-
for (int p = 0; p < clustererNN.mBoundaryMapSizePerRow; p++) {
132-
int32_t i = r * clustererNN.mBoundaryMapSizePerRow + p;
130+
int8_t skipCheckInRow = 0;
131+
for (int p = 0; p < clustererNN.mBoundaryMapSizePadsPerRow; p++) {
132+
int32_t i = r * clustererNN.mBoundaryMapSizePadsPerRow + p;
133133
clustererNN.mIsBoundary[i] = 1;
134-
if (p >= clustererNN.mNnClusterizerSizeInputPad || r >= clustererNN.mNnClusterizerSizeInputRow) {
134+
if (!skipCheckInRow && (p >= clustererNN.mBoundaryPadding || r >= clustererNN.mNnClusterizerSizeInputRow)) {
135135
if (r < (GPUTPCGeometry::EndIROC() + clustererNN.mNnClusterizerSizeInputRow)) {
136-
clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mNnClusterizerSizeInputPad) >= static_cast<int>(GPUTPCGeometry::NPads(r - clustererNN.mNnClusterizerSizeInputRow)));
136+
clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mBoundaryPadding) >= static_cast<int>(GPUTPCGeometry::NPads(r - clustererNN.mNnClusterizerSizeInputRow)));
137137
} else if (r >= (GPUTPCGeometry::EndIROC() + 2*clustererNN.mNnClusterizerSizeInputRow) && r < (o2::tpc::constants::MAXGLOBALPADROW + 2*clustererNN.mNnClusterizerSizeInputRow)) {
138-
clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mNnClusterizerSizeInputPad) >= static_cast<int>(GPUTPCGeometry::NPads(r - 2*clustererNN.mNnClusterizerSizeInputRow)));
139-
}
140-
if (clustererNN.mIsBoundary[i] == 1) {
141-
break; // No need to check further pads in this row
138+
clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mBoundaryPadding) >= static_cast<int>(GPUTPCGeometry::NPads(r - 2*clustererNN.mNnClusterizerSizeInputRow)));
142139
}
140+
skipCheckInRow = (clustererNN.mIsBoundary[i] == 1); // No need to check further pads in this row
143141
}
144142
}
145143
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,10 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
121121
template <>
122122
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNGPU>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
123123
{
124-
uint32_t glo_idx = get_global_id(0);
125124
auto& clusterer = processors.tpcClusterer[sector];
126125
auto& clustererNN = processors.tpcNNClusterer[sector];
126+
127+
uint32_t glo_idx = get_global_id(0);
127128
uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize);
128129
uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize);
129130

@@ -153,17 +154,22 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
153154
clustererNN.mInputData_32[top_idx - 2] = row / 152.f;
154155
clustererNN.mInputData_32[top_idx - 1] = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
155156
}
156-
} else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) {
157+
} else if ((int32_t)transient_index < clustererNN.mNnClusterizerChargeArraySize) {
157158
int32_t time = static_cast<int>(peak.time());
158159
int32_t idxLookup = 3*transient_index;
159160
int32_t r = clustererNN.mIndexLookup[idxLookup] + row, p = clustererNN.mIndexLookup[idxLookup + 1] + pad, t = clustererNN.mIndexLookup[idxLookup + 2] + time;
160161
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
161-
int32_t isBoundaryIndex = (r + row_offset + clustererNN.mNnClusterizerSizeInputRow) * clustererNN.mBoundaryMapSizePerRow + p + clustererNN.mNnClusterizerSizeInputPad;
162+
int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, r);
163+
p += pad_offset;
164+
int32_t isBoundaryIndex = (r + row_offset + clustererNN.mNnClusterizerSizeInputRow) * clustererNN.mBoundaryMapSizePadsPerRow + p + clustererNN.mBoundaryPadding;
162165

163166
if (!clustererNN.mIsBoundary[isBoundaryIndex] && (t >= 0) && (t < TPC_MAX_FRAGMENT_LEN_GPU)) {
164-
int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, r);
165167
float central_charge = static_cast<float>(chargeMap[peak].unpack());
166-
CfChargePos tmp_pos(r, p + pad_offset, t);
168+
CfChargePos tmp_pos(r, p, t);
169+
// if ((glo_idx % (clustererNN.mNnClusterizerElementSize*1000)) == (int)((clustererNN.mNnClusterizerChargeArraySize-1)/2.f)){
170+
// printf("glo_idx: %d, r: %d, p: %d, t: %d, tmp_pos: (%d, %d, %d), charge: %f, central_charge: %f\n",
171+
// glo_idx, clustererNN.mIndexLookup[idxLookup], clustererNN.mIndexLookup[idxLookup + 1], clustererNN.mIndexLookup[idxLookup + 2], tmp_pos.row(), tmp_pos.pad(), tmp_pos.time(), chargeMap[tmp_pos].unpack(), central_charge);
172+
// }
167173
if (dtype == 0) {
168174
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
169175
} else if (dtype == 1) {
@@ -489,24 +495,28 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
489495
// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
490496
GPUd() int32_t GPUTPCNNClusterizerKernels::padOffset(int32_t row_ref, int32_t row_current)
491497
{
492-
return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2);
498+
if(row_current < 0 || row_current > o2::tpc::constants::MAXGLOBALPADROW) {
499+
return 0; // Short-circuit for negative rows
500+
} else {
501+
return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2);
502+
}
493503
}
494504

495-
GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t global_shift)
505+
GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t offset)
496506
{
497-
return (row > 62 ? global_shift : 0);
507+
return (row > 62 ? offset : 0);
498508
}
499509

500-
GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift)
510+
GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t offset)
501511
{
502512
if (pad < 0 || row < 0) { // Faster short-circuit
503513
return true;
504514
} else if (row < 63) {
505515
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row)));
506-
} else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network
516+
} else if (row < (63 + offset)) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
507517
return true;
508-
} else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) {
509-
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - global_shift)));
518+
} else if (row < (o2::tpc::constants::MAXGLOBALPADROW + offset)) {
519+
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - offset)));
510520
} else {
511521
return true;
512522
}

0 commit comments

Comments
 (0)