Skip to content
Open
38 changes: 3 additions & 35 deletions openml/base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
# License: BSD 3-Clause
from __future__ import annotations

import re
import webbrowser
from abc import ABC, abstractmethod
from typing import Iterable, Sequence
from typing import Sequence

import xmltodict

import openml._api_calls
import openml.config
from openml.utils import ReprMixin

from .utils import _get_rest_api_type_alias, _tag_openml_base


class OpenMLBase(ABC):
class OpenMLBase(ReprMixin, ABC):
"""Base object for functionality that is shared across entities."""

def __repr__(self) -> str:
body_fields = self._get_repr_body_fields()
return self._apply_repr_template(body_fields)

@property
@abstractmethod
def id(self) -> int | None:
Expand Down Expand Up @@ -60,34 +56,6 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | N
"""
# Should be implemented in the base class.

def _apply_repr_template(
self,
body_fields: Iterable[tuple[str, str | int | list[str] | None]],
) -> str:
"""Generates the header and formats the body for string representation of the object.

Parameters
----------
body_fields: List[Tuple[str, str]]
A list of (name, value) pairs to display in the body of the __repr__.
"""
# We add spaces between capitals, e.g. ClassificationTask -> Classification Task
name_with_spaces = re.sub(
r"(\w)([A-Z])",
r"\1 \2",
self.__class__.__name__[len("OpenML") :],
)
header_text = f"OpenML {name_with_spaces}"
header = f"{header_text}\n{'=' * len(header_text)}\n"

_body_fields: list[tuple[str, str | int | list[str]]] = [
(k, "None" if v is None else v) for k, v in body_fields
]
longest_field_name_length = max(len(name) for name, _ in _body_fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
return header + body

@abstractmethod
def _to_dict(self) -> dict[str, dict]:
"""Creates a dictionary representation of self.
Expand Down
20 changes: 17 additions & 3 deletions openml/datasets/data_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
if TYPE_CHECKING:
from IPython.lib import pretty

from openml.utils import ReprMixin

class OpenMLDataFeature:

class OpenMLDataFeature(ReprMixin):
"""
Data Feature (a.k.a. Attribute) object.

Expand Down Expand Up @@ -74,8 +76,20 @@ def __init__( # noqa: PLR0913
self.number_missing_values = number_missing_values
self.ontologies = ontologies

def __repr__(self) -> str:
return "[%d - %s (%s)]" % (self.index, self.name, self.data_type)
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Index": self.index,
"Name": self.name,
"Data Type": self.data_type,
}

order = [
"Index",
"Name",
"Data Type",
]
return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__
Expand Down
41 changes: 13 additions & 28 deletions openml/setups/setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# License: BSD 3-Clause
from __future__ import annotations

from typing import Any
from typing import Any, Sequence

import openml.config
import openml.flows
from openml.utils import ReprMixin


class OpenMLSetup:
class OpenMLSetup(ReprMixin):
"""Setup object (a.k.a. Configuration).

Parameters
Expand Down Expand Up @@ -43,30 +44,21 @@ def _to_dict(self) -> dict[str, Any]:
else None,
}

def __repr__(self) -> str:
header = "OpenML Setup"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Setup ID": self.setup_id,
"Flow ID": self.flow_id,
"Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
"# of Parameters": (
len(self.parameters) if self.parameters is not None else float("nan")
),
"# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"),
}

# determines the order in which the information will be printed
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]


class OpenMLParameter:
class OpenMLParameter(ReprMixin):
"""Parameter object (used in setup).

Parameters
Expand Down Expand Up @@ -123,11 +115,9 @@ def _to_dict(self) -> dict[str, Any]:
"value": self.value,
}

def __repr__(self) -> str:
header = "OpenML Parameter"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"ID": self.id,
"Flow ID": self.flow_id,
# "Flow Name": self.flow_name,
Expand Down Expand Up @@ -156,9 +146,4 @@ def __repr__(self) -> str:
parameter_default,
parameter_value,
]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]
22 changes: 20 additions & 2 deletions openml/tasks/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import pickle
from collections import OrderedDict
from pathlib import Path
from typing import Any
from typing import Any, Sequence
from typing_extensions import NamedTuple

import arff # type: ignore
import numpy as np

from openml.utils import ReprMixin


class Split(NamedTuple):
"""A single split of a dataset."""
Expand All @@ -18,7 +20,7 @@ class Split(NamedTuple):
test: np.ndarray


class OpenMLSplit:
class OpenMLSplit(ReprMixin):
"""OpenML Split object.

This class manages train-test splits for a dataset across multiple
Expand Down Expand Up @@ -63,6 +65,22 @@ def __init__(
self.folds = len(self.split[0])
self.samples = len(self.split[0][0])

def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields = {
"Name": self.name,
"Description": (
self.description if len(self.description) <= 80 else self.description[:77] + "..."
),
"Repeats": self.repeats,
"Folds": self.folds,
"Samples": self.samples,
}

order = ["Name", "Description", "Repeats", "Folds", "Samples"]

return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
if (
(not isinstance(self, type(other)))
Expand Down
68 changes: 67 additions & 1 deletion openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,23 @@
from __future__ import annotations

import contextlib
import re
import shutil
import warnings
from abc import ABC, abstractmethod
from functools import wraps
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Mapping, Sized, TypeVar, overload
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterable,
Mapping,
Sequence,
Sized,
TypeVar,
overload,
)
from typing_extensions import Literal, ParamSpec

import numpy as np
Expand Down Expand Up @@ -469,3 +481,57 @@ def update(self, length: int) -> None:
self._progress_bar.update(length)
if self._progress_bar.total <= self._progress_bar.n:
self._progress_bar.close()


class ReprMixin(ABC):
"""A mixin class that provides a customizable string representation for OpenML objects.

This mixin standardizes the __repr__ output format across OpenML classes.
Classes inheriting from this mixin should implement the
_get_repr_body_fields method to specify which fields to display.
"""

def __repr__(self) -> str:
body_fields = self._get_repr_body_fields()
return self._apply_repr_template(body_fields)

@abstractmethod
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body.

Returns
-------
body_fields : List[Tuple[str, Union[str, int, List[str]]]]
A list of (name, value) pairs to display in the body of the __repr__.
E.g.: [('metric', 'accuracy'), ('dataset', 'iris')]
If value is a List of str, then each item of the list will appear in a separate row.
"""
# Should be implemented in the base class.

def _apply_repr_template(
self,
body_fields: Iterable[tuple[str, str | int | list[str] | None]],
) -> str:
"""Generates the header and formats the body for string representation of the object.

Parameters
----------
body_fields: List[Tuple[str, str]]
A list of (name, value) pairs to display in the body of the __repr__.
"""
# We add spaces between capitals, e.g. ClassificationTask -> Classification Task
name_with_spaces = re.sub(
r"(\w)([A-Z])",
r"\1 \2",
self.__class__.__name__[len("OpenML") :],
)
header_text = f"OpenML {name_with_spaces}"
header = f"{header_text}\n{'=' * len(header_text)}\n"

_body_fields: list[tuple[str, str | int | list[str]]] = [
(k, "None" if v is None else v) for k, v in body_fields
]
longest_field_name_length = max(len(name) for name, _ in _body_fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
return header + body