From c33785187ccb0e4722f952255ccb8fa97580fd3b Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 28 Feb 2023 17:00:00 -0600 Subject: [PATCH 1/7] init with https://code.activestate.com/recipes/523004-find-cyclical-references/ --- pytools/debug.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/pytools/debug.py b/pytools/debug.py index 71e08d2e..c314c0af 100644 --- a/pytools/debug.py +++ b/pytools/debug.py @@ -139,6 +139,76 @@ def is_excluded(o): # }}} +# {{{ Find circular references + +# https://code.activestate.com/recipes/523004-find-cyclical-references/ + +import gc +from types import FrameType + + +def print_cycles(objects, outstream=sys.stdout, show_progress=True): + """ + objects: A list of objects to find cycles in. It is often useful + to pass in gc.garbage to find the cycles that are + preventing some objects from being garbage collected. + outstream: The stream for output. + show_progress: If True, print the number of objects reached as they are + found. + """ + def print_path(path): + for i, step in enumerate(path): + # next_obj "wraps around" + next_obj = path[(i + 1) % len(path)] + + outstream.write(" %s -- " % str(type(step))) + if isinstance(step, dict): + for key, val in step.items(): + if val is next_obj: + outstream.write("[%s]" % repr(key)) + break + if key is next_obj: + outstream.write("[key] = %s" % repr(val)) + break + elif isinstance(step, list): + outstream.write("[%d]" % step.index(next_obj)) + elif isinstance(step, tuple): + outstream.write("[%d]" % step.index(next_obj)) + else: + outstream.write(repr(step)) + outstream.write(" ->\n") + outstream.write("\n") + + def recurse(obj, start, all_objs, current_path): + if show_progress: + outstream.write("%d\r" % len(all_objs)) + + all_objs.add(id(obj)) + + referents = gc.get_referents(obj) + for referent in referents: + # If we've found our way back to the start, this is + # a cycle, so print it out + if referent is start: + print_path(current_path) + + # Don't go back through the original list of objects, or + # through temporary references to the object, since those + # are just an artifact of the cycle detector itself. + elif referent is objects or isinstance(referent, FrameType): + continue + + # We haven't seen this object before, so recurse + elif id(referent) not in all_objs: + recurse(referent, start, all_objs, current_path + [obj]) + + for obj in objects: + outstream.write(f"Examining: {obj}\n") + recurse(obj, obj, set(), []) + +# }}} + + # {{{ interactive shell def get_shell_hist_filename(): From 15e822edea54751ef9a27d9782587aaf75388597 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 1 Mar 2023 14:52:50 -0600 Subject: [PATCH 2/7] make it a proper function --- pytools/debug.py | 63 ++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/pytools/debug.py b/pytools/debug.py index c314c0af..e3d54620 100644 --- a/pytools/debug.py +++ b/pytools/debug.py @@ -1,6 +1,7 @@ import sys from pytools import memoize +from typing import List, Set, Optional, Collection # {{{ debug files ------------------------------------------------------------- @@ -143,54 +144,26 @@ def is_excluded(o): # https://code.activestate.com/recipes/523004-find-cyclical-references/ -import gc -from types import FrameType - - -def print_cycles(objects, outstream=sys.stdout, show_progress=True): +def get_object_cycles(objects: Collection[object]) -> List[List[object]]: """ - objects: A list of objects to find cycles in. It is often useful - to pass in gc.garbage to find the cycles that are - preventing some objects from being garbage collected. - outstream: The stream for output. - show_progress: If True, print the number of objects reached as they are - found. + :param objects: A collection of objects to find cycles in. It is often + useful to pass in gc.garbage to find the cycles that are preventing some + objects from being garbage collected. """ - def print_path(path): - for i, step in enumerate(path): - # next_obj "wraps around" - next_obj = path[(i + 1) % len(path)] - - outstream.write(" %s -- " % str(type(step))) - if isinstance(step, dict): - for key, val in step.items(): - if val is next_obj: - outstream.write("[%s]" % repr(key)) - break - if key is next_obj: - outstream.write("[key] = %s" % repr(val)) - break - elif isinstance(step, list): - outstream.write("[%d]" % step.index(next_obj)) - elif isinstance(step, tuple): - outstream.write("[%d]" % step.index(next_obj)) - else: - outstream.write(repr(step)) - outstream.write(" ->\n") - outstream.write("\n") - - def recurse(obj, start, all_objs, current_path): - if show_progress: - outstream.write("%d\r" % len(all_objs)) - + def recurse(obj: object, start: object, all_objs: Set[object], + current_path: List[object]) -> Optional[List[object]]: all_objs.add(id(obj)) + import gc + from types import FrameType + referents = gc.get_referents(obj) + for referent in referents: # If we've found our way back to the start, this is # a cycle, so print it out if referent is start: - print_path(current_path) + return current_path # Don't go back through the original list of objects, or # through temporary references to the object, since those @@ -200,11 +173,17 @@ def recurse(obj, start, all_objs, current_path): # We haven't seen this object before, so recurse elif id(referent) not in all_objs: - recurse(referent, start, all_objs, current_path + [obj]) + return recurse(referent, start, all_objs, current_path + [obj]) + return None + + res = [] for obj in objects: - outstream.write(f"Examining: {obj}\n") - recurse(obj, obj, set(), []) + r = recurse(obj, obj, set(), []) + if r: + res.append(r) + + return res # }}} From 221eedfd9e5bfe9669db513d04363700dbf93daa Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 1 Mar 2023 14:53:02 -0600 Subject: [PATCH 3/7] add test --- test/test_debug.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 test/test_debug.py diff --git a/test/test_debug.py b/test/test_debug.py new file mode 100644 index 00000000..0c43c9ce --- /dev/null +++ b/test/test_debug.py @@ -0,0 +1,49 @@ +__copyright__ = "Copyright (C) 2023 University of Illinois Board of Trustees" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +def test_get_object_cycles(): + from pytools.debug import get_object_cycles + assert len(get_object_cycles([])) == 0 + + a = {} + assert len(get_object_cycles([a])) == 0 + + b = {"a": a} + assert len(get_object_cycles([b])) == 0 + assert len(get_object_cycles([a, b])) == 0 + + a["b"] = b + + assert len(get_object_cycles([a, b])) == 2 + assert len(get_object_cycles([a, b])) == 2 + assert len(get_object_cycles([a])) == 1 + + a = {} + + assert len(get_object_cycles([a])) == 0 + + b = [42, 4] + a = [1, 2, 3, 4, 5, b] + b.append(a) + + assert len(get_object_cycles([a, b])) == 2 From e4bae2aa7f9d397e83a1f228d9544bbd7fb297c4 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 1 Mar 2023 16:36:07 -0600 Subject: [PATCH 4/7] fix isort, pypy --- pytools/debug.py | 2 +- test/test_debug.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pytools/debug.py b/pytools/debug.py index e3d54620..eb80ad6a 100644 --- a/pytools/debug.py +++ b/pytools/debug.py @@ -1,7 +1,7 @@ import sys +from typing import Collection, List, Optional, Set from pytools import memoize -from typing import List, Set, Optional, Collection # {{{ debug files ------------------------------------------------------------- diff --git a/test/test_debug.py b/test/test_debug.py index 0c43c9ce..15e5efcf 100644 --- a/test/test_debug.py +++ b/test/test_debug.py @@ -20,7 +20,12 @@ THE SOFTWARE. """ +import sys +import pytest + + +@pytest.mark.skipif(hasattr(sys, "pypy_version_info"), reason="not support on PYPY") def test_get_object_cycles(): from pytools.debug import get_object_cycles assert len(get_object_cycles([])) == 0 From 6efeb9a34e61b17cea0a4b91d57970f6a1ff4bfa Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 14 Mar 2023 15:34:57 -0500 Subject: [PATCH 5/7] simplify --- pytools/debug.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pytools/debug.py b/pytools/debug.py index eb80ad6a..ae41aed8 100644 --- a/pytools/debug.py +++ b/pytools/debug.py @@ -1,5 +1,5 @@ import sys -from typing import Collection, List, Optional, Set +from typing import Collection, List, Set from pytools import memoize @@ -151,7 +151,7 @@ def get_object_cycles(objects: Collection[object]) -> List[List[object]]: objects from being garbage collected. """ def recurse(obj: object, start: object, all_objs: Set[object], - current_path: List[object]) -> Optional[List[object]]: + current_path: List[object]) -> None: all_objs.add(id(obj)) import gc @@ -161,9 +161,10 @@ def recurse(obj: object, start: object, all_objs: Set[object], for referent in referents: # If we've found our way back to the start, this is - # a cycle, so print it out + # a cycle, so return it if referent is start: - return current_path + res.append(current_path) + return # Don't go back through the original list of objects, or # through temporary references to the object, since those @@ -173,15 +174,11 @@ def recurse(obj: object, start: object, all_objs: Set[object], # We haven't seen this object before, so recurse elif id(referent) not in all_objs: - return recurse(referent, start, all_objs, current_path + [obj]) - - return None + recurse(referent, start, all_objs, current_path + [obj]) - res = [] + res: List[List[object]] = [] for obj in objects: - r = recurse(obj, obj, set(), []) - if r: - res.append(r) + recurse(obj, obj, set(), []) return res From 7f0cd0b60005df41b0bb51d9b4257b64f9cc3ee4 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 14 Mar 2023 15:38:16 -0500 Subject: [PATCH 6/7] test pypy --- test/test_debug.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/test_debug.py b/test/test_debug.py index 15e5efcf..0c43c9ce 100644 --- a/test/test_debug.py +++ b/test/test_debug.py @@ -20,12 +20,7 @@ THE SOFTWARE. """ -import sys -import pytest - - -@pytest.mark.skipif(hasattr(sys, "pypy_version_info"), reason="not support on PYPY") def test_get_object_cycles(): from pytools.debug import get_object_cycles assert len(get_object_cycles([])) == 0 From 75a567d3464a69e730314b676bca5b52eea0e1bc Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 14 Mar 2023 16:02:58 -0500 Subject: [PATCH 7/7] add to doc --- doc/debug.rst | 1 + doc/index.rst | 1 + pytools/debug.py | 56 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 doc/debug.rst diff --git a/doc/debug.rst b/doc/debug.rst new file mode 100644 index 00000000..1e834b0d --- /dev/null +++ b/doc/debug.rst @@ -0,0 +1 @@ +.. automodule:: pytools.debug diff --git a/doc/index.rst b/doc/index.rst index 1a1e9c44..2be19d8a 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -9,6 +9,7 @@ Welcome to pytools's documentation! obj_array persistent_dict graph + debug tag codegen mpi diff --git a/pytools/debug.py b/pytools/debug.py index ae41aed8..688b12cd 100644 --- a/pytools/debug.py +++ b/pytools/debug.py @@ -1,3 +1,36 @@ +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +__doc__ = """ +Debugging helpers +================= + +.. autofunction:: make_unique_filesystem_object +.. autofunction:: open_unique_debug_file +.. autofunction:: refdebug +.. autofunction:: get_object_cycles +.. autofunction:: estimate_memory_usage + +""" + import sys from typing import Collection, List, Set @@ -142,13 +175,28 @@ def is_excluded(o): # {{{ Find circular references -# https://code.activestate.com/recipes/523004-find-cyclical-references/ +# Based on https://code.activestate.com/recipes/523004-find-cyclical-references/ def get_object_cycles(objects: Collection[object]) -> List[List[object]]: """ - :param objects: A collection of objects to find cycles in. It is often - useful to pass in gc.garbage to find the cycles that are preventing some - objects from being garbage collected. + Find circular references in *objects*. This can be useful for example to debug + why certain objects need to be freed via garbage collection instead of + reference counting. + + :arg objects: A collection of objects to find cycles in. A potential way + to find a list of objects potentially containing cycles from the garbage + collector is the following code:: + + gc.set_debug(gc.DEBUG_SAVEALL) + gc.collect() + gc.set_debug(0) + obj_list = gc.garbage + + from pytools.debug import get_object_cycles + print(get_object_cycles(obj_list)) + + :returns: A :class:`list` in which each element contains a :class:`list` + of objects forming a cycle. """ def recurse(obj: object, start: object, all_objs: Set[object], current_path: List[object]) -> None: