From 0d630b0f0cf77ee9da81b677a5192c46210f7b86 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Mon, 9 Dec 2024 23:24:11 -0800 Subject: [PATCH 1/8] Initial commit with OOM diagnostics --- src/coreclr/gc/gc.cpp | 14 +++++++++++++- src/coreclr/gc/gcevents.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 3b717a6e239157..5263a5aebdc63c 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -3293,8 +3293,8 @@ void gc_heap::fire_pevents() // because EE is not suspended then. On entry it's fired after the GCStart event, on exit it's fire before the GCStop event. void gc_heap::fire_committed_usage_event() { -#ifdef FEATURE_EVENT_TRACE if (!EVENT_ENABLED (GCMarkWithType)) return; +#ifdef FEATURE_EVENT_TRACE size_t total_committed = 0; size_t committed_decommit = 0; @@ -17141,6 +17141,18 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, add_to_oom_history_per_heap(); fgm_result.fgm = fgm_no_failure; +#ifdef FEATURE_EVENT_TRACE + GCEventFireOOMDetails_V1 ( + (size_t)oom_info.gc_index, + (uint8_t)*oom_info.allocated, + (uint8_t)*oom_info.reserved, + (size_t)oom_info.alloc_size, + (uint8_t)oom_info.reason, + (uint8_t)oom_info.fgm, + (size_t)oom_info.size, + (size_t)oom_info.available_pagefile_mb); +#endif //FEATURE_EVENT_TRACE + // Break early - before the more_space_lock is release so no other threads // could have allocated on the same heap when OOM happened. if (GCConfig::GetBreakOnOOM()) diff --git a/src/coreclr/gc/gcevents.h b/src/coreclr/gc/gcevents.h index d09eaa3c692dad..bad68c3506edf2 100644 --- a/src/coreclr/gc/gcevents.h +++ b/src/coreclr/gc/gcevents.h @@ -52,6 +52,7 @@ DYNAMIC_EVENT(CommittedUsage, GCEventLevel_Information, GCEventKeyword_GC, 1) DYNAMIC_EVENT(SizeAdaptationTuning, GCEventLevel_Information, GCEventKeyword_GC, 1) DYNAMIC_EVENT(SizeAdaptationFullGCTuning, GCEventLevel_Information, GCEventKeyword_GC, 1) DYNAMIC_EVENT(SizeAdaptationSample, GCEventLevel_Information, GCEventKeyword_GC, 1) +DYNAMIC_EVENT(OOMDetails, GCEventLevel_Information, GCEventKeyword_GC, 1) #undef KNOWN_EVENT #undef DYNAMIC_EVENT From ae03c854517da9c157a6bf1732ef25e6e9d83271 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Tue, 10 Dec 2024 13:41:08 -0800 Subject: [PATCH 2/8] Added more pertinent fields --- src/coreclr/gc/gc.cpp | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 5263a5aebdc63c..473ba6b11b9f0b 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -17142,15 +17142,60 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, fgm_result.fgm = fgm_no_failure; #ifdef FEATURE_EVENT_TRACE + // 1. Memory Limit + // total_physical_mem + /* + uint64_t gc_heap::total_physical_mem = 0; + uint64_t gc_heap::entry_available_physical_mem = 0; + size_t gc_heap::heap_hard_limit = 0; + size_t gc_heap::heap_hard_limit_oh[total_oh_count]; + */ + + // 2. Available Physical Memory. + uint32_t memory_load = 0; + uint64_t available_physical = 0; + uint64_t available_page_file = 0; + get_memory_info (&memory_load, &available_physical, &available_page_file); + + // 3. Committed Usage. + size_t total_committed = 0; + size_t committed_decommit = 0; + size_t committed_free = 0; + size_t committed_bookkeeping = 0; + size_t new_current_total_committed; + size_t new_current_total_committed_bookkeeping; + size_t new_committed_by_oh[recorded_committed_bucket_counts]; + compute_committed_bytes(total_committed, committed_decommit, committed_free, + committed_bookkeeping, new_current_total_committed, new_current_total_committed_bookkeeping, + new_committed_by_oh); + size_t total_committed_in_use = new_committed_by_oh[soh] + new_committed_by_oh[loh] + new_committed_by_oh[poh]; + + // 4. Fragmentation. + // get_total_gen_fragmentation. + // get_total_fragmentation. + size_t total_fragmentation = get_total_fragmentation(); + + // 5. Is special_sweep_p set? + bool special_sweep_set_p = false; + #ifdef USE_REGIONS + special_sweep_set_p = special_sweep_p; + #endif //USE_REGIONS + GCEventFireOOMDetails_V1 ( (size_t)oom_info.gc_index, - (uint8_t)*oom_info.allocated, - (uint8_t)*oom_info.reserved, + (uint8_t)(*oom_info.allocated), + (uint8_t)(*oom_info.reserved), (size_t)oom_info.alloc_size, (uint8_t)oom_info.reason, (uint8_t)oom_info.fgm, (size_t)oom_info.size, - (size_t)oom_info.available_pagefile_mb); + (size_t)oom_info.available_pagefile_mb, // TODO: Consider removing in favor of the get_memory_info one. + (uint8_t)oom_info.loh_p, + (uint64_t)gc_heap::total_physical_mem, + (uint64_t)available_physical, + (size_t)total_committed, + (size_t)total_fragmentation, + (uint8_t)special_sweep_set_p); #endif //FEATURE_EVENT_TRACE // Break early - before the more_space_lock is release so no other threads From f9eaaa6e5ab306213ea73f9e2ab6220de07ad6e1 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Wed, 11 Dec 2024 07:37:56 -0800 Subject: [PATCH 3/8] Final event struct --- src/coreclr/gc/gc.cpp | 45 ++++--------------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 473ba6b11b9f0b..e360eb3963ffe3 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -17142,45 +17142,12 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, fgm_result.fgm = fgm_no_failure; #ifdef FEATURE_EVENT_TRACE - // 1. Memory Limit - // total_physical_mem - /* - uint64_t gc_heap::total_physical_mem = 0; - uint64_t gc_heap::entry_available_physical_mem = 0; - size_t gc_heap::heap_hard_limit = 0; - size_t gc_heap::heap_hard_limit_oh[total_oh_count]; - */ - - // 2. Available Physical Memory. - uint32_t memory_load = 0; + // Get Memory Load. + uint32_t memory_load = 0; // Include. uint64_t available_physical = 0; uint64_t available_page_file = 0; get_memory_info (&memory_load, &available_physical, &available_page_file); - // 3. Committed Usage. - size_t total_committed = 0; - size_t committed_decommit = 0; - size_t committed_free = 0; - size_t committed_bookkeeping = 0; - size_t new_current_total_committed; - size_t new_current_total_committed_bookkeeping; - size_t new_committed_by_oh[recorded_committed_bucket_counts]; - compute_committed_bytes(total_committed, committed_decommit, committed_free, - committed_bookkeeping, new_current_total_committed, new_current_total_committed_bookkeeping, - new_committed_by_oh); - size_t total_committed_in_use = new_committed_by_oh[soh] + new_committed_by_oh[loh] + new_committed_by_oh[poh]; - - // 4. Fragmentation. - // get_total_gen_fragmentation. - // get_total_fragmentation. - size_t total_fragmentation = get_total_fragmentation(); - - // 5. Is special_sweep_p set? - bool special_sweep_set_p = false; - #ifdef USE_REGIONS - special_sweep_set_p = special_sweep_p; - #endif //USE_REGIONS - GCEventFireOOMDetails_V1 ( (size_t)oom_info.gc_index, (uint8_t)(*oom_info.allocated), @@ -17189,13 +17156,9 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, (uint8_t)oom_info.reason, (uint8_t)oom_info.fgm, (size_t)oom_info.size, - (size_t)oom_info.available_pagefile_mb, // TODO: Consider removing in favor of the get_memory_info one. (uint8_t)oom_info.loh_p, - (uint64_t)gc_heap::total_physical_mem, - (uint64_t)available_physical, - (size_t)total_committed, - (size_t)total_fragmentation, - (uint8_t)special_sweep_set_p); + (uint32_t)memory_load); + #endif //FEATURE_EVENT_TRACE // Break early - before the more_space_lock is release so no other threads From 1d34ff53cc74fb33da6c9d0d4b87b2c1903d9dc7 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Wed, 11 Dec 2024 07:48:35 -0800 Subject: [PATCH 4/8] Reverted unintended code --- src/coreclr/gc/gc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index e360eb3963ffe3..dc8ec0b1aeb15f 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -3293,8 +3293,8 @@ void gc_heap::fire_pevents() // because EE is not suspended then. On entry it's fired after the GCStart event, on exit it's fire before the GCStop event. void gc_heap::fire_committed_usage_event() { - if (!EVENT_ENABLED (GCMarkWithType)) return; #ifdef FEATURE_EVENT_TRACE + if (!EVENT_ENABLED (GCMarkWithType)) return; size_t total_committed = 0; size_t committed_decommit = 0; From e94dbbe73ac2d7b7a1cd5a34c2fe1fa594830cc1 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Wed, 11 Dec 2024 07:53:23 -0800 Subject: [PATCH 5/8] Cleanup comment --- src/coreclr/gc/gc.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index dc8ec0b1aeb15f..7892858932d295 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -17142,8 +17142,9 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, fgm_result.fgm = fgm_no_failure; #ifdef FEATURE_EVENT_TRACE - // Get Memory Load. - uint32_t memory_load = 0; // Include. + + // Include just the memory_load. + uint32_t memory_load = 0; uint64_t available_physical = 0; uint64_t available_page_file = 0; get_memory_info (&memory_load, &available_physical, &available_page_file); From 987d07ec23e2f9afae9de5b6cbd34133f28c5cb8 Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Wed, 11 Dec 2024 18:43:38 -0800 Subject: [PATCH 6/8] Test for build break --- src/coreclr/gc/gc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 7892858932d295..4bb0d45cf9e70e 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -17150,13 +17150,13 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, get_memory_info (&memory_load, &available_physical, &available_page_file); GCEventFireOOMDetails_V1 ( - (size_t)oom_info.gc_index, + (uint64_t)oom_info.gc_index, (uint8_t)(*oom_info.allocated), (uint8_t)(*oom_info.reserved), - (size_t)oom_info.alloc_size, + (uint64_t)oom_info.alloc_size, (uint8_t)oom_info.reason, (uint8_t)oom_info.fgm, - (size_t)oom_info.size, + (uint64_t)oom_info.size, (uint8_t)oom_info.loh_p, (uint32_t)memory_load); From 39850603cc9d05018afa2ac5f77a0166aa1f074d Mon Sep 17 00:00:00 2001 From: Mukund Raghav Sharma Date: Mon, 13 Jan 2025 14:05:22 -0800 Subject: [PATCH 7/8] Fixed event and added one more case where we oom for uoh --- src/coreclr/gc/gc.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 4bb0d45cf9e70e..52272046e37f8c 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -17139,11 +17139,8 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, oom_info.loh_p = fgm_result.loh_p; add_to_oom_history_per_heap(); - fgm_result.fgm = fgm_no_failure; #ifdef FEATURE_EVENT_TRACE - - // Include just the memory_load. uint32_t memory_load = 0; uint64_t available_physical = 0; uint64_t available_page_file = 0; @@ -17151,17 +17148,17 @@ void gc_heap::handle_oom (oom_reason reason, size_t alloc_size, GCEventFireOOMDetails_V1 ( (uint64_t)oom_info.gc_index, - (uint8_t)(*oom_info.allocated), - (uint8_t)(*oom_info.reserved), (uint64_t)oom_info.alloc_size, (uint8_t)oom_info.reason, (uint8_t)oom_info.fgm, (uint64_t)oom_info.size, (uint8_t)oom_info.loh_p, - (uint32_t)memory_load); - + (uint32_t)memory_load, + (uint64_t)(available_page_file / (1024 * 1024))); #endif //FEATURE_EVENT_TRACE + fgm_result.fgm = fgm_no_failure; + // Break early - before the more_space_lock is release so no other threads // could have allocated on the same heap when OOM happened. if (GCConfig::GetBreakOnOOM()) @@ -19620,6 +19617,7 @@ BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size, alloc_heap = balance_heaps_uoh_hard_limit_retry (acontext, size, alloc_generation_number); if (alloc_heap == nullptr || (retry_count++ == UOH_ALLOCATION_RETRY_MAX_COUNT)) { + alloc_heap->handle_oom (oom_loh, size, 0, 0); return false; } } From bdfd60f41a8e5b1c5bf2c2ee629eff21dd266d01 Mon Sep 17 00:00:00 2001 From: Maoni0 Date: Tue, 14 Jan 2025 01:39:45 -0800 Subject: [PATCH 8/8] fgm --- src/coreclr/gc/gc.cpp | 55 +++++++++++++++++++------------- src/coreclr/gc/gcinterface.dac.h | 3 +- src/coreclr/gc/gcpriv.h | 8 ++--- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 52272046e37f8c..4a5827491a4ca1 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -4054,7 +4054,7 @@ void region_allocator::leave_spin_lock() region_allocator_lock.lock = -1; } -uint8_t* region_allocator::allocate (uint32_t num_units, allocate_direction direction, region_allocator_callback_fn fn) +uint8_t* region_allocator::allocate (uint32_t num_units, allocate_direction direction, region_allocator_callback_fn fn, bool uoh_p) { enter_spin_lock(); @@ -4155,7 +4155,7 @@ uint8_t* region_allocator::allocate (uint32_t num_units, allocate_direction dire total_free_units -= num_units; if (fn != nullptr) { - if (!fn (global_region_left_used)) + if (!fn (global_region_left_used, uoh_p)) { delete_region_impl (alloc); alloc = nullptr; @@ -4186,29 +4186,32 @@ bool region_allocator::allocate_region (int gen_num, size_t size, uint8_t** star uint8_t* alloc = NULL; dprintf (REGIONS_LOG, ("----GET %u-----", num_units)); - alloc = allocate (num_units, direction, fn); + alloc = allocate (num_units, direction, fn, (gen_num >= uoh_start_generation)); *start = alloc; *end = alloc + alloc_size; ret = (alloc != NULL); - gc_etw_segment_type segment_type; - - if (gen_num == loh_generation) - { - segment_type = gc_etw_segment_large_object_heap; - } - else if (gen_num == poh_generation) - { - segment_type = gc_etw_segment_pinned_object_heap; - } - else + if (ret) { - segment_type = gc_etw_segment_small_object_heap; - } + gc_etw_segment_type segment_type; - FIRE_EVENT(GCCreateSegment_V1, (alloc + sizeof (aligned_plug_and_gap)), - size - sizeof (aligned_plug_and_gap), - segment_type); + if (gen_num == loh_generation) + { + segment_type = gc_etw_segment_large_object_heap; + } + else if (gen_num == poh_generation) + { + segment_type = gc_etw_segment_pinned_object_heap; + } + else + { + segment_type = gc_etw_segment_small_object_heap; + } + + FIRE_EVENT(GCCreateSegment_V1, (alloc + sizeof (aligned_plug_and_gap)), + size - sizeof (aligned_plug_and_gap), + segment_type); + } return ret; } @@ -9280,7 +9283,7 @@ void gc_heap::get_card_table_element_layout (uint8_t* start, uint8_t* end, size_ } #ifdef USE_REGIONS -bool gc_heap::on_used_changed (uint8_t* new_used) +bool gc_heap::on_used_changed (uint8_t* new_used, bool uoh_p) { #if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC) if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_BARRIERCHECK) @@ -9342,7 +9345,7 @@ bool gc_heap::on_used_changed (uint8_t* new_used) dprintf (REGIONS_LOG, ("bookkeeping_covered_committed = %p", bookkeeping_covered_committed)); dprintf (REGIONS_LOG, ("new_bookkeeping_covered_committed = %p", new_bookkeeping_covered_committed)); - if (inplace_commit_card_table (bookkeeping_covered_committed, new_bookkeeping_covered_committed)) + if (inplace_commit_card_table (bookkeeping_covered_committed, new_bookkeeping_covered_committed, uoh_p)) { bookkeeping_covered_committed = new_bookkeeping_covered_committed; break; @@ -9443,7 +9446,7 @@ bool gc_heap::get_card_table_commit_layout (uint8_t* from, uint8_t* to, return true; } -bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to) +bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to, bool uoh_p=false) { dprintf (REGIONS_LOG, ("inplace_commit_card_table(%p, %p), size = %zd", from, to, to - from)); @@ -9467,6 +9470,7 @@ bool gc_heap::inplace_commit_card_table (uint8_t* from, uint8_t* to) succeed = virtual_commit (commit_begins[i], commit_sizes[i], recorded_committed_bookkeeping_bucket); if (!succeed) { + set_fgm_result (fgm_commit_table, commit_sizes[i], uoh_p); failed_commit = i; break; } @@ -15593,6 +15597,10 @@ BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address, bool* (heap_segment_committed (seg) <= heap_segment_decommit_target (seg))); #endif //MULTIPLE_HEAPS && !USE_REGIONS } + else + { + fgm_result.set_fgm (fgm_commit_heap, c_size, heap_segment_uoh_p (seg)); + } return !!ret; } @@ -35205,6 +35213,8 @@ heap_segment* gc_heap::allocate_new_region (gc_heap* hp, int gen_num, bool uoh_p if (!allocated_p) { + hp->fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); + return 0; } @@ -35214,6 +35224,7 @@ heap_segment* gc_heap::allocate_new_region (gc_heap* hp, int gen_num, bool uoh_p if (res == nullptr) { + hp->fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); global_region_allocator.delete_region (start); } diff --git a/src/coreclr/gc/gcinterface.dac.h b/src/coreclr/gc/gcinterface.dac.h index b3be7b09cee60f..9516fbb285a05a 100644 --- a/src/coreclr/gc/gcinterface.dac.h +++ b/src/coreclr/gc/gcinterface.dac.h @@ -157,7 +157,8 @@ enum failure_get_memory fgm_commit_segment_beg = 2, fgm_commit_eph_segment = 3, fgm_grow_table = 4, - fgm_commit_table = 5 + fgm_commit_table = 5, + fgm_commit_heap = 6 }; // A record of the last OOM that occurred in the GC, with some diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 525620a747f3e6..d679987a457e0e 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -2093,7 +2093,7 @@ class gc_heap PER_HEAP_ISOLATED_METHOD void set_fgm_result (failure_get_memory f, size_t s, BOOL loh_p); #ifdef USE_REGIONS - PER_HEAP_ISOLATED_METHOD bool on_used_changed (uint8_t* left); + PER_HEAP_ISOLATED_METHOD bool on_used_changed (uint8_t* left, bool uoh_p); // new_sizes are the logical sizes of the card table elements while // commit_sizes are the physical sizes of the card table elements due to alignment constraints. @@ -2102,7 +2102,7 @@ class gc_heap size_t commit_sizes[total_bookkeeping_elements], size_t new_sizes[total_bookkeeping_elements]); - PER_HEAP_ISOLATED_METHOD bool inplace_commit_card_table (uint8_t* from, uint8_t* to); + PER_HEAP_ISOLATED_METHOD bool inplace_commit_card_table (uint8_t* from, uint8_t* to, bool uoh_p); #else //USE_REGIONS PER_HEAP_ISOLATED_METHOD int grow_brick_card_tables (uint8_t* start, uint8_t* end, @@ -6283,7 +6283,7 @@ enum allocate_direction allocate_backward = -1, }; -typedef bool (*region_allocator_callback_fn)(uint8_t*); +typedef bool (*region_allocator_callback_fn)(uint8_t*, bool); // The big space we reserve for regions is divided into units of region_alignment. // @@ -6339,7 +6339,7 @@ class region_allocator uint8_t* region_address_of (uint32_t* map_index); uint32_t* region_map_index_of (uint8_t* address); - uint8_t* allocate (uint32_t num_units, allocate_direction direction, region_allocator_callback_fn fn); + uint8_t* allocate (uint32_t num_units, allocate_direction direction, region_allocator_callback_fn fn, bool uoh_p); uint8_t* allocate_end (uint32_t num_units, allocate_direction direction); void enter_spin_lock();