From 4a99fa19c012d78544d9ea04e0c28d377d8bcd06 Mon Sep 17 00:00:00 2001 From: Joshua Thomas Johnson Date: Wed, 26 Nov 2025 10:13:09 +0530 Subject: [PATCH 1/6] Add metadata property to OpenMLBenchmarkSuite for LaTeX export - Add metadata property that returns pandas DataFrame with task and dataset metadata - Implement efficient batch API calls using _list_tasks and list_datasets - Add lazy loading with caching to avoid redundant API calls - Handle edge cases: empty suites, missing datasets, and API errors - Add comprehensive unit tests (7 test cases) - Add example script demonstrating LaTeX export usage Fixes #1126 --- .../Advanced/suite_metadata_latex_export.py | 158 ++++++++++++++ openml/study/study.py | 120 +++++++++++ .../test_benchmark_suite_metadata.py | 197 ++++++++++++++++++ 3 files changed, 475 insertions(+) create mode 100644 examples/Advanced/suite_metadata_latex_export.py create mode 100644 tests/test_study/test_benchmark_suite_metadata.py diff --git a/examples/Advanced/suite_metadata_latex_export.py b/examples/Advanced/suite_metadata_latex_export.py new file mode 100644 index 000000000..b7da7c4b2 --- /dev/null +++ b/examples/Advanced/suite_metadata_latex_export.py @@ -0,0 +1,158 @@ +# %% [markdown] +""" +Example: Exporting Benchmark Suite Metadata to LaTeX + +This example demonstrates how to use the metadata property on OpenMLBenchmarkSuite +to generate LaTeX tables for academic publications. + +The metadata property returns a pandas DataFrame containing both task-level and +dataset-level information, which can be easily exported to LaTeX using pandas' +Styler API. +""" + +# %% +import openml + +# %% [markdown] +# ## Getting Suite Metadata +# +# First, we retrieve a benchmark suite. Here we use OpenML-CC18, a curated suite +# of 72 classification tasks. + +# %% +suite = openml.study.get_suite(99) # OpenML-CC18 +print(f"Suite: {suite.name}") +print(f"Number of tasks: {len(suite.tasks)}") + +# %% [markdown] +# ## Accessing Metadata +# +# The `metadata` property returns a pandas DataFrame with comprehensive +# information about all tasks in the suite. This includes both task-specific +# information (like estimation procedure) and dataset characteristics (like +# number of instances and features). + +# %% +metadata = suite.metadata +print(f"Metadata shape: {metadata.shape}") +print(f"\nFirst few columns: {metadata.columns.tolist()[:10]}") + +# %% [markdown] +# ## Selecting Columns for Publication +# +# For a typical publication table, we might want to include: +# - Dataset name +# - Number of instances +# - Number of features +# - Number of classes +# - Number of missing values + +# %% +# Select relevant columns for the table +columns = [ + "name", + "NumberOfInstances", + "NumberOfFeatures", + "NumberOfClasses", + "NumberOfMissingValues", +] + +# Filter to only include columns that exist in the DataFrame +available_columns = [col for col in columns if col in metadata.columns] +table_data = metadata[available_columns] + +print(f"\nSelected {len(available_columns)} columns") +print(table_data.head()) + +# %% [markdown] +# ## Generating LaTeX Table +# +# We use pandas' Styler API to format and export the table to LaTeX. +# The Styler provides many formatting options for professional-looking tables. + +# %% +# Generate LaTeX table with formatting +latex_table = ( + table_data.style.format( + { + "NumberOfInstances": "{:,}", # Add thousand separators + "NumberOfFeatures": "{:d}", # Integer format + "NumberOfClasses": "{:d}", + "NumberOfMissingValues": "{:d}", + } + ) + .hide(axis="index") # Hide row indices + .to_latex( + caption="Dataset Characteristics for OpenML-CC18", + label="tab:cc18_metadata", + hrules=True, # Add horizontal rules + position="H", # Float position + ) +) + +print(latex_table) + +# %% [markdown] +# ## Saving to File +# +# The LaTeX code can be saved directly to a file for inclusion in your document. + +# %% +# Save to file +with open("suite_metadata.tex", "w", encoding="utf-8") as f: + f.write(latex_table) + +print("LaTeX table saved to 'suite_metadata.tex'") + +# %% [markdown] +# ## Advanced Formatting +# +# For more advanced formatting, you can: +# - Apply conditional formatting +# - Add custom CSS classes +# - Format specific rows or columns +# - Include multi-level headers + +# %% +# Example: Format rows with high number of missing values +def highlight_missing(row): + """Highlight rows with many missing values.""" + if row["NumberOfMissingValues"] > 100: + return ["background-color: #ffcccc"] * len(row) + return [""] * len(row) + + +styled_table = ( + table_data.style.apply(highlight_missing, axis=1) + .format( + { + "NumberOfInstances": "{:,}", + "NumberOfFeatures": "{:d}", + "NumberOfClasses": "{:d}", + "NumberOfMissingValues": "{:d}", + } + ) + .hide(axis="index") +) + +# Note: Styler.to_latex() doesn't support all CSS styling, but basic formatting works +latex_advanced = styled_table.to_latex( + caption="Dataset Characteristics (Highlighted Missing Values)", + label="tab:cc18_metadata_advanced", + hrules=True, +) + +print("Advanced LaTeX table generated") + +# %% [markdown] +# ## Summary +# +# The `metadata` property makes it easy to: +# 1. Access comprehensive task and dataset information +# 2. Filter and select relevant columns +# 3. Export to LaTeX for academic publications +# 4. Apply custom formatting as needed +# +# This workflow eliminates the need for manual data aggregation and ensures +# consistency across publications using the same benchmark suite. + diff --git a/openml/study/study.py b/openml/study/study.py index 83bbf0497..f04e00cc7 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -4,8 +4,13 @@ from typing import Any, Sequence +import pandas as pd + from openml.base import OpenMLBase from openml.config import get_server_base_url +from openml.datasets.functions import list_datasets +from openml.exceptions import OpenMLServerException +from openml.tasks.functions import _list_tasks class BaseStudy(OpenMLBase): @@ -330,3 +335,118 @@ def __init__( # noqa: PLR0913 runs=None, setups=None, ) + # Initialize metadata cache + self._metadata: pd.DataFrame | None = None + + @property + def metadata(self) -> pd.DataFrame: + """ + Returns a pandas DataFrame containing metadata for all tasks in the suite. + + The DataFrame includes: + - Task-level information: task ID (tid), task type, estimation procedure, + target feature, evaluation measure + - Dataset-level information: dataset ID (did), dataset name, version, + uploader, number of instances, number of features, number of classes, + and other dataset qualities + + The result is cached after the first access. Subsequent calls return the + cached DataFrame without making additional API calls. + + Returns + ------- + pd.DataFrame + A DataFrame with one row per task in the suite. The DataFrame is indexed + by the default integer index. Columns include both task and dataset metadata. + + Raises + ------ + RuntimeError + If task metadata cannot be retrieved from the OpenML server. + + Examples + -------- + >>> import openml + >>> suite = openml.study.get_suite(99) # OpenML-CC18 + >>> meta = suite.metadata + >>> print(meta.columns.tolist()[:5]) # First 5 columns + ['tid', 'did', 'name', 'task_type', 'status'] + + >>> # Export to LaTeX + >>> columns = ['name', 'NumberOfInstances', 'NumberOfFeatures', 'NumberOfClasses'] + >>> latex_table = meta[columns].style.to_latex( + ... caption="Dataset Characteristics", + ... label="tab:suite_metadata" + ... ) + """ + # Return cached result if available + if self._metadata is not None: + return self._metadata + + # Handle empty suites gracefully + if not self.tasks: + self._metadata = pd.DataFrame() + return self._metadata + + # Step 1: Fetch Task Metadata + # Use internal _list_tasks because public API doesn't support task_id filtering + try: + task_df = _list_tasks( + limit=max(len(self.tasks), 1000), + offset=0, + task_id=self.tasks, + ) + + # _list_tasks returns DataFrame with 'tid' as index (from orient="index") + # Reset index to make 'tid' a column for easier merging + if task_df.index.name == "tid": + task_df = task_df.reset_index() + + # Verify we got the expected tasks + if len(task_df) == 0: + # No tasks found - return empty DataFrame + self._metadata = pd.DataFrame() + return self._metadata + + # Ensure 'tid' column exists (should after reset_index if index was named 'tid') + if "tid" not in task_df.columns: + # This shouldn't happen, but handle gracefully + raise RuntimeError( + f"Task metadata missing 'tid' column. Columns: {task_df.columns.tolist()}" + ) + + except OpenMLServerException as e: + raise RuntimeError( + f"Failed to retrieve task metadata for suite {self.id}: {e}" + ) from e + except Exception as e: + raise RuntimeError( + f"Unexpected error retrieving task metadata for suite {self.id}: {e}" + ) from e + + # Step 2: Extract unique dataset IDs and fetch dataset metadata + if "did" in task_df.columns and len(task_df) > 0: + unique_dids = task_df["did"].unique().tolist() + + try: + dataset_df = list_datasets(data_id=unique_dids) + except OpenMLServerException as e: + raise RuntimeError(f"Failed to retrieve dataset metadata: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error retrieving dataset metadata: {e}") from e + + # Step 3: Merge DataFrames + # Use left join to preserve all tasks (one row per task) + # Apply suffixes to handle column name collisions + self._metadata = pd.merge( + task_df, + dataset_df, + on="did", + how="left", + suffixes=("", "_dataset"), + ) + else: + # Fallback: return task DataFrame only if 'did' column is missing + self._metadata = task_df + + return self._metadata diff --git a/tests/test_study/test_benchmark_suite_metadata.py b/tests/test_study/test_benchmark_suite_metadata.py new file mode 100644 index 000000000..dd7dbf202 --- /dev/null +++ b/tests/test_study/test_benchmark_suite_metadata.py @@ -0,0 +1,197 @@ +# License: BSD 3-Clause +from __future__ import annotations + +import unittest +from unittest.mock import patch + +import pandas as pd +import pytest + +from openml.study import OpenMLBenchmarkSuite +from openml.testing import TestBase + + +class TestBenchmarkSuiteMetadata(TestBase): + """Test suite for OpenMLBenchmarkSuite.metadata property.""" + + def setUp(self): + """Create a test suite instance.""" + super().setUp() + self.suite = OpenMLBenchmarkSuite( + suite_id=99, + alias="test-suite", + name="Test Suite", + description="A test suite", + status="active", + creation_date="2022-01-01", + creator=1, + tags=None, + data=None, + tasks=[1, 2, 3], + ) + + @patch("openml.study.study.list_datasets") + @patch("openml.study.study._list_tasks") + def test_metadata_basic_structure(self, mock_list_tasks, mock_list_datasets): + """Test that metadata returns a DataFrame with expected structure.""" + # Mock task response (with tid as index) + task_data = { + 1: {"tid": 1, "did": 10, "name": "Task1", "NumberOfInstances": 100}, + 2: {"tid": 2, "did": 11, "name": "Task2", "NumberOfInstances": 200}, + 3: {"tid": 3, "did": 10, "name": "Task3", "NumberOfInstances": 150}, + } + task_df = pd.DataFrame.from_dict(task_data, orient="index") + task_df.index.name = "tid" + mock_list_tasks.return_value = task_df + + # Mock dataset response + dataset_df = pd.DataFrame( + { + "did": [10, 11], + "version": [1, 1], + "uploader": [5, 5], + "name": ["Dataset1", "Dataset2"], + } + ) + mock_list_datasets.return_value = dataset_df + + # Access property + metadata = self.suite.metadata + + # Assertions + assert isinstance(metadata, pd.DataFrame) + assert len(metadata) == 3 # One row per task + assert "tid" in metadata.columns + assert "did" in metadata.columns + assert "version" in metadata.columns + assert "NumberOfInstances" in metadata.columns + + # Verify API calls + mock_list_tasks.assert_called_once() + mock_list_datasets.assert_called_once() + + @patch("openml.study.study._list_tasks") + def test_metadata_caching(self, mock_list_tasks): + """Test that metadata is cached after first access.""" + task_df = pd.DataFrame( + { + "tid": [1], + "did": [10], + "name": ["Task1"], + } + ) + task_df.index.name = "tid" + mock_list_tasks.return_value = task_df + + # First access + meta1 = self.suite.metadata + # Second access + meta2 = self.suite.metadata + + # Should be same object (cached) + assert meta1 is meta2 + # Should only call API once + assert mock_list_tasks.call_count == 1 + + def test_metadata_empty_suite(self): + """Test metadata for suite with no tasks.""" + empty_suite = OpenMLBenchmarkSuite( + suite_id=1, + alias=None, + name="Empty Suite", + description="", + status="active", + creation_date="2022-01-01", + creator=1, + tags=None, + data=None, + tasks=[], # Empty tasks + ) + + metadata = empty_suite.metadata + assert isinstance(metadata, pd.DataFrame) + assert len(metadata) == 0 + + @patch("openml.study.study.list_datasets") + @patch("openml.study.study._list_tasks") + def test_metadata_merge_behavior(self, mock_list_tasks, mock_list_datasets): + """Test that merge preserves task structure (left join).""" + # Task with dataset that doesn't exist in dataset_df + task_df = pd.DataFrame( + { + "tid": [1, 2], + "did": [10, 99], # did=99 doesn't exist in dataset_df + "name": ["Task1", "Task2"], + } + ) + task_df.index.name = "tid" + mock_list_tasks.return_value = task_df + + dataset_df = pd.DataFrame({"did": [10], "version": [1]}) + mock_list_datasets.return_value = dataset_df + + metadata = self.suite.metadata + + # Should have 2 rows (one per task) + assert len(metadata) == 2 + # Task 1 should have version + assert metadata.loc[metadata["tid"] == 1, "version"].iloc[0] == 1 + # Task 2 should have NaN for version (missing dataset) + assert pd.isna(metadata.loc[metadata["tid"] == 2, "version"].iloc[0]) + + @patch("openml.study.study._list_tasks") + def test_metadata_error_handling(self, mock_list_tasks): + """Test error handling when API calls fail.""" + from openml.exceptions import OpenMLServerException + + mock_list_tasks.side_effect = OpenMLServerException("Server error", code=500) + + with pytest.raises(RuntimeError, match="Failed to retrieve task metadata"): + _ = self.suite.metadata + + @patch("openml.study.study.list_datasets") + @patch("openml.study.study._list_tasks") + def test_metadata_index_reset(self, mock_list_tasks, mock_list_datasets): + """Test that index is properly reset when tid is index.""" + # Create DataFrame with tid as index + task_df = pd.DataFrame( + { + "did": [10, 11], + "name": ["Task1", "Task2"], + "NumberOfInstances": [100, 200], + }, + index=[1, 2], + ) + task_df.index.name = "tid" + mock_list_tasks.return_value = task_df + + dataset_df = pd.DataFrame({"did": [10, 11], "version": [1, 1]}) + mock_list_datasets.return_value = dataset_df + + metadata = self.suite.metadata + + # After reset_index, tid should be a column, not the index + assert "tid" in metadata.columns + assert metadata.index.name is None or isinstance(metadata.index, pd.RangeIndex) + + @patch("openml.study.study.list_datasets") + @patch("openml.study.study._list_tasks") + def test_metadata_no_did_column(self, mock_list_tasks, mock_list_datasets): + """Test fallback when did column is missing.""" + # Task DataFrame without 'did' column (unlikely but test it) + task_df = pd.DataFrame( + { + "tid": [1, 2], + "name": ["Task1", "Task2"], + } + ) + mock_list_tasks.return_value = task_df + + metadata = self.suite.metadata + + # Should return task_df without merging + assert len(metadata) == 2 + assert "did" not in metadata.columns + # Should not call list_datasets + mock_list_datasets.assert_not_called() + From adddce1485b07a609de4801d20f5c3aaa71387bf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 04:47:52 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/study/study.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openml/study/study.py b/openml/study/study.py index f04e00cc7..1caecd0c6 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -416,9 +416,7 @@ def metadata(self) -> pd.DataFrame: ) except OpenMLServerException as e: - raise RuntimeError( - f"Failed to retrieve task metadata for suite {self.id}: {e}" - ) from e + raise RuntimeError(f"Failed to retrieve task metadata for suite {self.id}: {e}") from e except Exception as e: raise RuntimeError( f"Unexpected error retrieving task metadata for suite {self.id}: {e}" From c2de5ef919589cb604a40f3ddd88d7291f3b3cac Mon Sep 17 00:00:00 2001 From: Joshua Thomas Johnson Date: Wed, 26 Nov 2025 10:56:49 +0530 Subject: [PATCH 3/6] Fix linting: reduce complexity, use DataFrame.merge() --- openml/study/study.py | 144 +++++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 58 deletions(-) diff --git a/openml/study/study.py b/openml/study/study.py index 1caecd0c6..2409bc31a 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -338,6 +338,86 @@ def __init__( # noqa: PLR0913 # Initialize metadata cache self._metadata: pd.DataFrame | None = None + def _fetch_task_metadata(self) -> pd.DataFrame: + """Fetch task metadata for all tasks in the suite. + + Returns + ------- + pd.DataFrame + DataFrame with task metadata, with 'tid' as a column. + + Raises + ------ + RuntimeError + If task metadata cannot be retrieved. + """ + try: + task_df = _list_tasks( + limit=max(len(self.tasks), 1000), + offset=0, + task_id=self.tasks, + ) + + # _list_tasks returns DataFrame with 'tid' as index (from orient="index") + # Reset index to make 'tid' a column for easier merging + if task_df.index.name == "tid": + task_df = task_df.reset_index() + + # Verify we got the expected tasks + if len(task_df) == 0: + return pd.DataFrame() + + # Ensure 'tid' column exists + if "tid" not in task_df.columns: + raise RuntimeError( + f"Task metadata missing 'tid' column. Columns: {task_df.columns.tolist()}" + ) + + return task_df + + except OpenMLServerException as e: + raise RuntimeError(f"Failed to retrieve task metadata for suite {self.id}: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error retrieving task metadata for suite {self.id}: {e}") from e + + def _merge_dataset_metadata(self, task_df: pd.DataFrame) -> pd.DataFrame: + """Merge dataset metadata with task metadata. + + Parameters + ---------- + task_df : pd.DataFrame + DataFrame containing task metadata with 'did' column. + + Returns + ------- + pd.DataFrame + Merged DataFrame with both task and dataset metadata. + + Raises + ------ + RuntimeError + If dataset metadata cannot be retrieved. + """ + if "did" not in task_df.columns or len(task_df) == 0: + return task_df + + unique_dids = task_df["did"].unique().tolist() + + try: + dataset_df = list_datasets(data_id=unique_dids) + except OpenMLServerException as e: + raise RuntimeError(f"Failed to retrieve dataset metadata: {e}") from e + except Exception as e: + raise RuntimeError(f"Unexpected error retrieving dataset metadata: {e}") from e + + # Use DataFrame.merge() method instead of pd.merge() function + return task_df.merge( + dataset_df, + on="did", + how="left", + suffixes=("", "_dataset"), + ) + @property def metadata(self) -> pd.DataFrame: """ @@ -388,63 +468,11 @@ def metadata(self) -> pd.DataFrame: self._metadata = pd.DataFrame() return self._metadata - # Step 1: Fetch Task Metadata - # Use internal _list_tasks because public API doesn't support task_id filtering - try: - task_df = _list_tasks( - limit=max(len(self.tasks), 1000), - offset=0, - task_id=self.tasks, - ) - - # _list_tasks returns DataFrame with 'tid' as index (from orient="index") - # Reset index to make 'tid' a column for easier merging - if task_df.index.name == "tid": - task_df = task_df.reset_index() - - # Verify we got the expected tasks - if len(task_df) == 0: - # No tasks found - return empty DataFrame - self._metadata = pd.DataFrame() - return self._metadata - - # Ensure 'tid' column exists (should after reset_index if index was named 'tid') - if "tid" not in task_df.columns: - # This shouldn't happen, but handle gracefully - raise RuntimeError( - f"Task metadata missing 'tid' column. Columns: {task_df.columns.tolist()}" - ) - - except OpenMLServerException as e: - raise RuntimeError(f"Failed to retrieve task metadata for suite {self.id}: {e}") from e - except Exception as e: - raise RuntimeError( - f"Unexpected error retrieving task metadata for suite {self.id}: {e}" - ) from e - - # Step 2: Extract unique dataset IDs and fetch dataset metadata - if "did" in task_df.columns and len(task_df) > 0: - unique_dids = task_df["did"].unique().tolist() - - try: - dataset_df = list_datasets(data_id=unique_dids) - except OpenMLServerException as e: - raise RuntimeError(f"Failed to retrieve dataset metadata: {e}") from e - except Exception as e: - raise RuntimeError(f"Unexpected error retrieving dataset metadata: {e}") from e - - # Step 3: Merge DataFrames - # Use left join to preserve all tasks (one row per task) - # Apply suffixes to handle column name collisions - self._metadata = pd.merge( - task_df, - dataset_df, - on="did", - how="left", - suffixes=("", "_dataset"), - ) - else: - # Fallback: return task DataFrame only if 'did' column is missing - self._metadata = task_df + # Fetch task metadata and merge with dataset metadata + task_df = self._fetch_task_metadata() + if len(task_df) == 0: + self._metadata = pd.DataFrame() + return self._metadata + self._metadata = self._merge_dataset_metadata(task_df) return self._metadata From fd4980674fd88aeac99e2e9c17e7e119b7b39aab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 05:27:52 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/study/study.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openml/study/study.py b/openml/study/study.py index 2409bc31a..61ed35f9b 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -378,7 +378,9 @@ def _fetch_task_metadata(self) -> pd.DataFrame: except OpenMLServerException as e: raise RuntimeError(f"Failed to retrieve task metadata for suite {self.id}: {e}") from e except Exception as e: - raise RuntimeError(f"Unexpected error retrieving task metadata for suite {self.id}: {e}") from e + raise RuntimeError( + f"Unexpected error retrieving task metadata for suite {self.id}: {e}" + ) from e def _merge_dataset_metadata(self, task_df: pd.DataFrame) -> pd.DataFrame: """Merge dataset metadata with task metadata. From 7e32ea66efc8476b64374a9716a6cca8898da005 Mon Sep 17 00:00:00 2001 From: Joshua Thomas Johnson Date: Wed, 26 Nov 2025 10:58:57 +0530 Subject: [PATCH 5/6] Fix remaining lint errors: line length and type check --- openml/study/study.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/openml/study/study.py b/openml/study/study.py index 2409bc31a..7378d4251 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -352,10 +352,12 @@ def _fetch_task_metadata(self) -> pd.DataFrame: If task metadata cannot be retrieved. """ try: + # self.tasks is guaranteed non-empty here (checked in metadata property) + tasks_list = self.tasks if self.tasks is not None else [] task_df = _list_tasks( - limit=max(len(self.tasks), 1000), + limit=max(len(tasks_list), 1000), offset=0, - task_id=self.tasks, + task_id=tasks_list, ) # _list_tasks returns DataFrame with 'tid' as index (from orient="index") @@ -376,9 +378,11 @@ def _fetch_task_metadata(self) -> pd.DataFrame: return task_df except OpenMLServerException as e: - raise RuntimeError(f"Failed to retrieve task metadata for suite {self.id}: {e}") from e + msg = f"Failed to retrieve task metadata for suite {self.id}: {e}" + raise RuntimeError(msg) from e except Exception as e: - raise RuntimeError(f"Unexpected error retrieving task metadata for suite {self.id}: {e}") from e + msg = f"Unexpected error retrieving task metadata for suite {self.id}: {e}" + raise RuntimeError(msg) from e def _merge_dataset_metadata(self, task_df: pd.DataFrame) -> pd.DataFrame: """Merge dataset metadata with task metadata. From 4024844876e37d93a1b64326f3023e9517b4dd82 Mon Sep 17 00:00:00 2001 From: Joshua Thomas Johnson Date: Wed, 26 Nov 2025 11:15:11 +0530 Subject: [PATCH 6/6] Resolve merge conflict in study.py --- openml/study/study.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/openml/study/study.py b/openml/study/study.py index 870b6208e..7378d4251 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -381,14 +381,8 @@ def _fetch_task_metadata(self) -> pd.DataFrame: msg = f"Failed to retrieve task metadata for suite {self.id}: {e}" raise RuntimeError(msg) from e except Exception as e: -<<<<<<< HEAD msg = f"Unexpected error retrieving task metadata for suite {self.id}: {e}" raise RuntimeError(msg) from e -======= - raise RuntimeError( - f"Unexpected error retrieving task metadata for suite {self.id}: {e}" - ) from e ->>>>>>> fd4980674fd88aeac99e2e9c17e7e119b7b39aab def _merge_dataset_metadata(self, task_df: pd.DataFrame) -> pd.DataFrame: """Merge dataset metadata with task metadata.