From bd38404de1234fbf027fbd749d894c7b1edc1f4b Mon Sep 17 00:00:00 2001
From: Erfan Nourbakhsh <erfan@princeton.edu>
Date: Thu, 8 May 2025 05:26:05 -0400
Subject: [PATCH 1/4] Enable exception diagnostics output in pipetask report

---
 python/lsst/ctrl/mpexec/cli/cmd/commands.py  |  26 +++-
 python/lsst/ctrl/mpexec/cli/script/report.py | 153 +++++++++++++++++--
 2 files changed, 162 insertions(+), 17 deletions(-)

diff --git a/python/lsst/ctrl/mpexec/cli/cmd/commands.py b/python/lsst/ctrl/mpexec/cli/cmd/commands.py
index 7116d351..65b437b6 100644
--- a/python/lsst/ctrl/mpexec/cli/cmd/commands.py
+++ b/python/lsst/ctrl/mpexec/cli/cmd/commands.py
@@ -338,6 +338,22 @@ def update_graph_run(
     "or when using the --force-v2 option, this should be a json file. We will be "
     "deprecating the single-graph-only (QuantumGraphExecutionReport) option soon.",
 )
+@click.option(
+    "--show-exception-diagnostics",
+    is_flag=True,
+    default=False,
+    help="Show exception diagnostics in the report. This is a detailed report of "
+    "the exceptions that occurred during the execution of the pipeline, combined "
+    "with the exposure dimension records.",
+)
+@click.option(
+    "--exception-diagnostics-filename",
+    default="",
+    help="Write to a file with this name a detailed exception report joined with exposure "
+    "dimension records. It's useful for spotting trends in exception types across data IDs. "
+    "Uses Astropy-supported formats based on file extension (e.g., html, csv, ecsv, pandas.json). "
+    "Full list: https://docs.astropy.org/en/stable/io/unified.html",
+)
 @click.option("--logs/--no-logs", default=True, help="Get butler log datasets for extra information.")
 @click.option(
     "--brief",
@@ -384,6 +400,8 @@ def report(
     collections: Sequence[str] | None,
     where: str,
     full_output_filename: str = "",
+    exception_diagnostics_filename: str = "",
+    show_exception_diagnostics: bool = False,
     logs: bool = True,
     brief: bool = False,
     curse_failed_logs: bool = False,
@@ -422,6 +440,8 @@ def report(
             collections,
             where,
             full_output_filename,
+            exception_diagnostics_filename,
+            show_exception_diagnostics,
             logs,
             brief,
             curse_failed_logs,
@@ -450,7 +470,9 @@ def report(
     " also printed to the screen when using the --full-output-filename option.",
 )
 def aggregate_reports(
-    filenames: Sequence[str], full_output_filename: str | None, brief: bool = False
+    filenames: Sequence[str],
+    full_output_filename: str | None,
+    brief: bool = False,
 ) -> None:
     """Aggregate pipetask report output on disjoint data-id groups into one
     Summary over common tasks and datasets. Intended for use when the same
@@ -466,4 +488,4 @@ def aggregate_reports(
     FILENAMES are the space-separated paths to json file output created by
     pipetask report.
     """
-    script.aggregate_reports(filenames, full_output_filename, brief)
+    script.aggregate_reports(filenames, full_output_filename, brief=brief)
diff --git a/python/lsst/ctrl/mpexec/cli/script/report.py b/python/lsst/ctrl/mpexec/cli/script/report.py
index dce4adfb..ada9ebfe 100644
--- a/python/lsst/ctrl/mpexec/cli/script/report.py
+++ b/python/lsst/ctrl/mpexec/cli/script/report.py
@@ -28,6 +28,7 @@
 from collections.abc import Sequence
 from typing import Literal
 
+from astropy.io.registry import get_formats
 from astropy.table import Table
 
 from lsst.daf.butler import Butler
@@ -122,6 +123,8 @@ def report_v2(
     collections: Sequence[str] | None,
     where: str,
     full_output_filename: str | None,
+    exception_diagnostics_filename: str | None,
+    show_exception_diagnostics: bool = False,
     logs: bool = True,
     brief: bool = False,
     curse_failed_logs: bool = False,
@@ -141,9 +144,9 @@ def report_v2(
     butler_config : `str`
         The Butler used for this report. This should match the Butler used for
         the run associated with the executed quantum graph.
-    qgraph_uris : `Sequence` [`str`]
+    qgraph_uris : `Sequence` of `str`
         One or more uris to the serialized Quantum Graph(s).
-    collections : `Sequence` [`str`] | None`
+    collections : `Sequence` of `str` or `None`
         Collection(s) associated with said graphs/processing. For use in
         `lsst.daf.butler.registry.queryDatasets` if paring down the query would
         be useful.
@@ -155,14 +158,27 @@ def report_v2(
         tools such as the ones used by Campaign Management software and pilots,
         and for searching and counting specific kinds or instances of failures.
         This option will also print a "brief" (counts-only) summary to stdout.
-    logs : `bool`
+    exception_diagnostics_filename : `str`
+        Output the exception diagnostics into an Astropy-supported format
+        based on file extension (e.g., html, csv, ecsv, pandas.json).
+        Full list: https://docs.astropy.org/en/stable/io/unified.html.
+        The "html" and "htm" formats are recommended as they'll be made
+        interactive (searchable and sortable) using Astropy's JSViewer writer.
+        This is a troubleshooting-oriented report of the exceptions combined
+        with the exposure dimension records.
+    show_exception_diagnostics : `bool`
+        Show exception diagnostics on stdout. This will be a truncated view of
+        the exception diagnostics table, if the table is too large. To save the
+        full table to a file, use the ``exception_diagnostics_filename``
+        argument.
+    logs : `bool`, optional
         Store error messages from Butler logs associated with failed quanta if
         `True`.
-    brief : `bool`
+    brief : `bool`, optional
         Only display short (counts-only) summary on stdout. This includes
         counts and not error messages or data_ids (similar to BPS report). This
         option will still report all `cursed` datasets and `wonky` quanta.
-    curse_failed_logs : `bool`
+    curse_failed_logs : `bool`, optional
         Mark log datasets as `cursed` if they are published in the final output
         collection. Note that a campaign-level collection must be used here for
         `collections` if `curse_failed_logs` is `True`; if
@@ -183,7 +199,7 @@ def report_v2(
         This should reduce the number of database operations.
     n_cores : `int`, optional
         Number of cores for metadata and log reads.
-    view_graph : `bool`
+    view_graph : `bool`, optional
         Display a graph representation of `QuantumProvenanceGraph.Summary` on
         stdout instead of the default plain-text summary. Pipeline graph nodes
         are then annotated with their status. This is a useful way to visualize
@@ -228,8 +244,22 @@ def report_v2(
             status_annotator=status_annotator,
             status_options=status_options,
         )
+
+        if full_output_filename or exception_diagnostics_filename or show_exception_diagnostics:
+            print(
+                "Warning: you have requested to write the summary to a file or show or save exception "
+                "diagnostics, but this will not be done when viewing the graph. Please run again without "
+                "--view_graph to get those outputs."
+            )
     else:
-        print_summary(summary, full_output_filename, brief)
+        print_summary(
+            summary,
+            full_output_filename,
+            exception_diagnostics_filename=exception_diagnostics_filename,
+            show_exception_diagnostics=show_exception_diagnostics,
+            brief=brief,
+            butler=butler if show_exception_diagnostics or exception_diagnostics_filename else None,
+        )
 
 
 def aggregate_reports(
@@ -242,15 +272,15 @@ def aggregate_reports(
 
     Parameters
     ----------
-    filenames : `Sequence[str]`
+    filenames : `Sequence` of `str`
         The paths to the JSON files produced by `pipetask report` (note: this
         is only compatible with the multi-graph or `--force-v2` option). These
         files correspond to the `QuantumProvenanceGraph.Summary` objects which
         are produced for each group.
-    full_output_filename : `str | None`
+    full_output_filename : `str` or `None`
         The name of the JSON file in which to store the aggregate report, if
         passed. This is passed to `print_summary` at the end of this function.
-    brief : `bool = False`
+    brief : `bool`, optional
         Only display short (counts-only) summary on stdout. This includes
         counts and not error messages or data_ids (similar to BPS report).
         This option will still report all `cursed` datasets and `wonky`
@@ -262,10 +292,54 @@ def aggregate_reports(
             model = Summary.model_validate_json(f.read())
             summaries.extend([model])
     result = Summary.aggregate(summaries)
-    print_summary(result, full_output_filename, brief)
+    print_summary(result, full_output_filename, brief=brief)
+
+
+def infer_write_format_from_filename(filename: str, default: str = "jsviewer") -> str:
+    """Guess the appropriate Astropy `Table.write` format from a filename.
+
+    Parameters
+    ----------
+    filename : `str`
+        The name of the file to be written.
+    default : `str`, optional
+        The default format to return if no match is found. Default is
+        "jsviewer" for interactive HTML output.
+
+    Returns
+    -------
+    format : str
+        A valid format string that can be passed to `Table.write(format=...)`.
+    """
+    formats = get_formats(Table)
+    write_formats = {str(f["Format"]).lower() for f in formats if f["Write"]}
+
+    filename = filename.lower()
+    parts = filename.split(".")
+
+    # Try full two-part suffix (e.g., "pandas.json").
+    if len(parts) >= 2:
+        suffix = ".".join(parts[-2:])
+        if suffix in write_formats:
+            return suffix
+
+    # Try just the last extension (e.g., "ecsv").
+    if len(parts) >= 1:
+        ext = parts[-1]
+        if ext in write_formats:
+            return ext
 
+    return default
 
-def print_summary(summary: Summary, full_output_filename: str | None, brief: bool = False) -> None:
+
+def print_summary(
+    summary: Summary,
+    full_output_filename: str | None,
+    exception_diagnostics_filename: str | None = None,
+    show_exception_diagnostics: bool = False,
+    brief: bool = False,
+    butler: Butler | None = None,
+) -> None:
     """Take a `QuantumProvenanceGraph.Summary` object and write it to a file
     and/or the screen.
 
@@ -274,15 +348,64 @@ def print_summary(summary: Summary, full_output_filename: str | None, brief: boo
     summary : `QuantumProvenanceGraph.Summary`
         This `Pydantic` model contains all the information derived from the
         `QuantumProvenanceGraph`.
-    full_output_filename : `str | None`
+    full_output_filename : `str` or `None`
         Name of the JSON file in which to store summary information, if
         passed.
+    exception_diagnostics_filename : `str` or `None`
+        Name of the file to write exception diagnostics to, if provided. This
+        report combines exception information with exposure dimension records,
+        useful for troubleshooting. The output format is inferred from the
+        filename extension. If the extension is "htm", "html", or unrecognized,
+        the format defaults to "jsviewer" to generate an interactive,
+        searchable and sortable HTML table.
+    show_exception_diagnostics : `bool`
+        Display exception diagnostics on stdout. If the table is too large,
+        only a truncated version will be shown. To save the complete table, use
+        the ``exception_diagnostics_filename`` argument.
     brief : `bool`
         Only display short (counts-only) summary on stdout. This includes
         counts and not error messages or data_ids (similar to BPS report).
-        Ignored (considered `False`) if ``full_output_filename`` is passed.
+        Ignored (considered `False`) if ``full_output_filename`` or
+        ``exception_diagnostics_filename`` is passed.
+    butler : `lsst.daf.butler.Butler` or `None`
+        Butler to use for the report. This is needed to get the exposure
+        dimension records for the exception diagnostics.
     """
-    summary.pprint(brief=(brief or bool(full_output_filename)))
+    summary.pprint(
+        brief=(brief or bool(full_output_filename) or bool(exception_diagnostics_filename)),
+        show_exception_diagnostics=show_exception_diagnostics,
+        butler=butler,
+    )
     if full_output_filename:
+        # Write the full summary to a file.
         with open(full_output_filename, "w") as stream:
             stream.write(summary.model_dump_json(indent=2))
+        print(f"Full summary written to {full_output_filename}")
+
+    if exception_diagnostics_filename:
+        exception_diagnostics_table = summary.make_exception_diagnostics_table(butler)
+
+        # Check the file extension to determine the format.
+        fmt = infer_write_format_from_filename(exception_diagnostics_filename)
+
+        kwargs: dict[str, bool | int | str] = {"overwrite": True}
+
+        if fmt in ("htm", "html"):
+            # Use the interactive HTML format for HTML files.
+            fmt = "jsviewer"
+
+        kwargs["format"] = fmt
+
+        if fmt == "jsviewer":
+            # Set the maximum number of lines to the length of the table.
+            # Otherwise it'll default to 5000 lines.
+            kwargs["max_lines"] = len(exception_diagnostics_table)
+
+            # If the filename doesn't end with ".html" or ".htm", add ".html".
+            # This is needed for the file to be recognized as an HTML file.
+            if not exception_diagnostics_filename.endswith((".html", ".htm")):
+                exception_diagnostics_filename += ".html"
+
+        # Write the exception diagnostics table to a file.
+        exception_diagnostics_table.write(exception_diagnostics_filename, **kwargs)
+        print(f"Exception diagnostics written to {exception_diagnostics_filename}")

From 81fd77ac51db6a1795c5ec9d0ecac9b4c30b273a Mon Sep 17 00:00:00 2001
From: Erfan Nourbakhsh <erfan@princeton.edu>
Date: Sun, 11 May 2025 17:03:24 -0400
Subject: [PATCH 2/4] Ignore mypy abstract class error

---
 python/lsst/ctrl/mpexec/cli/pipetask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lsst/ctrl/mpexec/cli/pipetask.py b/python/lsst/ctrl/mpexec/cli/pipetask.py
index fb1d41d7..f369c46a 100644
--- a/python/lsst/ctrl/mpexec/cli/pipetask.py
+++ b/python/lsst/ctrl/mpexec/cli/pipetask.py
@@ -43,7 +43,7 @@ class PipetaskCLI(LoaderCLI):
     localCmdPkg = "lsst.ctrl.mpexec.cli.cmd"
 
 
-@click.command(cls=PipetaskCLI, context_settings=dict(help_option_names=["-h", "--help"]))
+@click.command(cls=PipetaskCLI, context_settings=dict(help_option_names=["-h", "--help"]))  # type: ignore
 @log_level_option()
 @long_log_option()
 @log_file_option()

From f6f6b73f7d81174085b00334a69ced8953299783 Mon Sep 17 00:00:00 2001
From: Erfan Nourbakhsh <erfan@princeton.edu>
Date: Mon, 12 May 2025 16:17:20 -0400
Subject: [PATCH 3/4] Add release note for exception diagnostics output in
 pipetask report

---
 doc/changes/DM-50550.feature.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/changes/DM-50550.feature.md

diff --git a/doc/changes/DM-50550.feature.md b/doc/changes/DM-50550.feature.md
new file mode 100644
index 00000000..8985a6c7
--- /dev/null
+++ b/doc/changes/DM-50550.feature.md
@@ -0,0 +1 @@
+Enable exception diagnostics output in `pipetask report`.
\ No newline at end of file

From 85232cc19fa801032f4b6e94beff81577dc1fd43 Mon Sep 17 00:00:00 2001
From: Erfan Nourbakhsh <erfan@princeton.edu>
Date: Tue, 13 May 2025 16:18:17 -0400
Subject: [PATCH 4/4] Reuse exception diagnostics table if already available

---
 python/lsst/ctrl/mpexec/cli/script/report.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/lsst/ctrl/mpexec/cli/script/report.py b/python/lsst/ctrl/mpexec/cli/script/report.py
index ada9ebfe..10af9872 100644
--- a/python/lsst/ctrl/mpexec/cli/script/report.py
+++ b/python/lsst/ctrl/mpexec/cli/script/report.py
@@ -371,10 +371,11 @@ def print_summary(
         Butler to use for the report. This is needed to get the exposure
         dimension records for the exception diagnostics.
     """
-    summary.pprint(
+    exception_diagnostics_table = summary.pprint(
         brief=(brief or bool(full_output_filename) or bool(exception_diagnostics_filename)),
         show_exception_diagnostics=show_exception_diagnostics,
         butler=butler,
+        return_exception_diagnostics_table=show_exception_diagnostics and exception_diagnostics_filename,
     )
     if full_output_filename:
         # Write the full summary to a file.
@@ -383,7 +384,10 @@ def print_summary(
         print(f"Full summary written to {full_output_filename}")
 
     if exception_diagnostics_filename:
-        exception_diagnostics_table = summary.make_exception_diagnostics_table(butler)
+        # Create the exception diagnostics table if it wasn't created in the
+        # previous step due to `show_exception_diagnostics` being False.
+        if not exception_diagnostics_table:
+            exception_diagnostics_table = summary.make_exception_diagnostics_table(butler)
 
         # Check the file extension to determine the format.
         fmt = infer_write_format_from_filename(exception_diagnostics_filename)