diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index 6dd3887c82417..520d2273e2185 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -143,7 +143,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) if (deviceIndex >= 0) { (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h - (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 247438fa8a13f..382e93f06aea8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -699,7 +699,7 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options // api.GetCurrentGpuDeviceId(deviceId); OrtROCMProviderOptions rocm_options; rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream - rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code + rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code rocm_options.user_compute_stream = mInternals->Streams[stream]; session_options.AppendExecutionProvider_ROCM(rocm_options); #endif // ORT_ROCM_BUILD diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index bda4c70d79c9d..ceda3acd7db46 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -125,7 +125,7 @@ struct MockedOrtAllocator : OrtAllocator { void LeakCheck(); -private: + private: MockedOrtAllocator(const MockedOrtAllocator&) = delete; MockedOrtAllocator& operator=(const MockedOrtAllocator&) = delete; @@ -136,7 +136,8 @@ struct MockedOrtAllocator : OrtAllocator { GPUReconstruction* rec; }; -MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) { +MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) +{ OrtAllocator::version = ORT_API_VERSION; OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Alloc(size); }; OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; @@ -146,42 +147,50 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info memory_info = info; } -MockedOrtAllocator::~MockedOrtAllocator() { +MockedOrtAllocator::~MockedOrtAllocator() +{ // Ort::GetApi().ReleaseMemoryInfo(memory_info); } -void* MockedOrtAllocator::Alloc(size_t size) { +void* MockedOrtAllocator::Alloc(size_t size) +{ return rec->AllocateVolatileDeviceMemory(size); } -void* MockedOrtAllocator::Reserve(size_t size) { +void* MockedOrtAllocator::Reserve(size_t size) +{ return rec->AllocateVolatileDeviceMemory(size); } -void MockedOrtAllocator::Free(void* p) { +void MockedOrtAllocator::Free(void* p) +{ rec->ReturnVolatileDeviceMemory(); } -const OrtMemoryInfo* MockedOrtAllocator::Info() const { +const OrtMemoryInfo* MockedOrtAllocator::Info() const +{ return memory_info; } -size_t MockedOrtAllocator::NumAllocations() const { +size_t MockedOrtAllocator::NumAllocations() const +{ return num_allocations.load(); } -size_t MockedOrtAllocator::NumReserveAllocations() const { +size_t MockedOrtAllocator::NumReserveAllocations() const +{ return num_reserve_allocations.load(); } -void MockedOrtAllocator::LeakCheck() { +void MockedOrtAllocator::LeakCheck() +{ if (memory_inuse.load()) LOG(warning) << "memory leak!!!"; } void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, int32_t chooseMockedAlloc) { - if(chooseMockedAlloc == 0) { + if (chooseMockedAlloc == 0) { mockedAlloc_class = std::make_shared(rec, (OrtMemoryInfo*)memInfo); Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc_class.get()); LOG(info) << "(ORT) Mocked ORT allocator for classification network registered";