@@ -121,9 +121,10 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
121121template <>
122122GPUdii () void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNGPU>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
123123{
124- uint32_t glo_idx = get_global_id (0 );
125124 auto & clusterer = processors.tpcClusterer [sector];
126125 auto & clustererNN = processors.tpcNNClusterer [sector];
126+
127+ uint32_t glo_idx = get_global_id (0 );
127128 uint32_t base_idx = CAMath::Floor (glo_idx / clustererNN.mNnClusterizerElementSize );
128129 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize );
129130
@@ -153,17 +154,22 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
153154 clustererNN.mInputData_32 [top_idx - 2 ] = row / 152 .f ;
154155 clustererNN.mInputData_32 [top_idx - 1 ] = static_cast <float >(pad) / GPUTPCGeometry::NPads (row);
155156 }
156- } else if ((int32_t )transient_index < ( clustererNN.mNnClusterizerElementSize - 3 ) ) {
157+ } else if ((int32_t )transient_index < clustererNN.mNnClusterizerChargeArraySize ) {
157158 int32_t time = static_cast <int >(peak.time ());
158159 int32_t idxLookup = 3 *transient_index;
159160 int32_t r = clustererNN.mIndexLookup [idxLookup] + row, p = clustererNN.mIndexLookup [idxLookup + 1 ] + pad, t = clustererNN.mIndexLookup [idxLookup + 2 ] + time;
160161 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset (row, clustererNN.mNnClusterizerSizeInputRow );
161- int32_t isBoundaryIndex = (r + row_offset + clustererNN.mNnClusterizerSizeInputRow ) * clustererNN.mBoundaryMapSizePerRow + p + clustererNN.mNnClusterizerSizeInputPad ;
162+ int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset (row, r);
163+ p += pad_offset;
164+ int32_t isBoundaryIndex = (r + row_offset + clustererNN.mNnClusterizerSizeInputRow ) * clustererNN.mBoundaryMapSizePadsPerRow + p + clustererNN.mBoundaryPadding ;
162165
163166 if (!clustererNN.mIsBoundary [isBoundaryIndex] && (t >= 0 ) && (t < TPC_MAX_FRAGMENT_LEN_GPU)) {
164- int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset (row, r);
165167 float central_charge = static_cast <float >(chargeMap[peak].unpack ());
166- CfChargePos tmp_pos (r, p + pad_offset, t);
168+ CfChargePos tmp_pos (r, p, t);
169+ // if ((glo_idx % (clustererNN.mNnClusterizerElementSize*1000)) == (int)((clustererNN.mNnClusterizerChargeArraySize-1)/2.f)){
170+ // printf("glo_idx: %d, r: %d, p: %d, t: %d, tmp_pos: (%d, %d, %d), charge: %f, central_charge: %f\n",
171+ // glo_idx, clustererNN.mIndexLookup[idxLookup], clustererNN.mIndexLookup[idxLookup + 1], clustererNN.mIndexLookup[idxLookup + 2], tmp_pos.row(), tmp_pos.pad(), tmp_pos.time(), chargeMap[tmp_pos].unpack(), central_charge);
172+ // }
167173 if (dtype == 0 ) {
168174 clustererNN.mInputData_16 [glo_idx] = (OrtDataType::Float16_t)(static_cast <float >(chargeMap[tmp_pos].unpack ()) / central_charge);
169175 } else if (dtype == 1 ) {
@@ -489,24 +495,28 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
489495// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
490496GPUd () int32_t GPUTPCNNClusterizerKernels::padOffset(int32_t row_ref, int32_t row_current)
491497{
492- return (int )((GPUTPCGeometry::NPads (row_current) - GPUTPCGeometry::NPads (row_ref)) / 2 );
498+ if (row_current < 0 || row_current > o2::tpc::constants::MAXGLOBALPADROW) {
499+ return 0 ; // Short-circuit for negative rows
500+ } else {
501+ return (int )((GPUTPCGeometry::NPads (row_current) - GPUTPCGeometry::NPads (row_ref)) / 2 );
502+ }
493503}
494504
495- GPUd () int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t global_shift )
505+ GPUd () int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t offset )
496506{
497- return (row > 62 ? global_shift : 0 );
507+ return (row > 62 ? offset : 0 );
498508}
499509
500- GPUd () bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift )
510+ GPUd () bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t offset )
501511{
502512 if (pad < 0 || row < 0 ) { // Faster short-circuit
503513 return true ;
504514 } else if (row < 63 ) {
505515 return (pad >= static_cast <int >(GPUTPCGeometry::NPads (row)));
506- } else if (row < (63 + global_shift )) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network
516+ } else if (row < (63 + offset )) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
507517 return true ;
508- } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift )) {
509- return (pad >= static_cast <int >(GPUTPCGeometry::NPads (row - global_shift )));
518+ } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + offset )) {
519+ return (pad >= static_cast <int >(GPUTPCGeometry::NPads (row - offset )));
510520 } else {
511521 return true ;
512522 }
0 commit comments