diff --git a/openml/utils.py b/openml/utils.py index 7e72e7aee..b19f9e698 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -2,11 +2,23 @@ from __future__ import annotations import contextlib +import re import shutil import warnings +from abc import ABC, abstractmethod from functools import wraps from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Mapping, Sized, TypeVar, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Mapping, + Sequence, + Sized, + TypeVar, + overload, +) from typing_extensions import Literal, ParamSpec import numpy as np @@ -469,3 +481,57 @@ def update(self, length: int) -> None: self._progress_bar.update(length) if self._progress_bar.total <= self._progress_bar.n: self._progress_bar.close() + + +class ReprMixin(ABC): + """A mixin class that provides a customizable string representation for OpenML objects. + + This mixin standardizes the __repr__ output format across OpenML classes. + Classes inheriting from this mixin should implement the + _get_repr_body_fields method to specify which fields to display. + """ + + def __repr__(self) -> str: + body_fields = self._get_repr_body_fields() + return self._apply_repr_template(body_fields) + + @abstractmethod + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body. + + Returns + ------- + body_fields : List[Tuple[str, Union[str, int, List[str]]]] + A list of (name, value) pairs to display in the body of the __repr__. + E.g.: [('metric', 'accuracy'), ('dataset', 'iris')] + If value is a List of str, then each item of the list will appear in a separate row. + """ + # Should be implemented in the base class. + + def _apply_repr_template( + self, + body_fields: Iterable[tuple[str, str | int | list[str] | None]], + ) -> str: + """Generates the header and formats the body for string representation of the object. + + Parameters + ---------- + body_fields: List[Tuple[str, str]] + A list of (name, value) pairs to display in the body of the __repr__. + """ + # We add spaces between capitals, e.g. ClassificationTask -> Classification Task + name_with_spaces = re.sub( + r"(\w)([A-Z])", + r"\1 \2", + self.__class__.__name__[len("OpenML") :], + ) + header_text = f"OpenML {name_with_spaces}" + header = f"{header_text}\n{'=' * len(header_text)}\n" + + _body_fields: list[tuple[str, str | int | list[str]]] = [ + (k, "None" if v is None else v) for k, v in body_fields + ] + longest_field_name_length = max(len(name) for name, _ in _body_fields) + field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" + body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields) + return header + body