Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/Basics/simple_flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
clf = KNeighborsClassifier(**knn_parameters)
clf.fit(X_train, y_train)

# Option A: auto-publish the estimator via unified helper (requires openml-sklearn extension).
try:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try/except for case distinction is not good. This does not allow to distinguish the intended case from a genuine failure in the "try" branch.

Plus, in the examples, there should be no try/except.

flow_id = openml.publish(clf)
print(f"Auto-published flow id: {flow_id}")
except Exception as ex: # pragma: no cover - example path
print(f"Auto-publish failed (is openml-sklearn installed?): {ex}")

# Get experiment results
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)
Expand All @@ -57,6 +64,7 @@
# First, create a fow and fill it with metadata about the machine learning model.

# %%
# Option B: manually build the flow
knn_flow = openml.flows.OpenMLFlow(
# Metadata
model=clf, # or None, if you do not want to upload the model object.
Expand Down
35 changes: 35 additions & 0 deletions openml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# License: BSD 3-Clause
from __future__ import annotations

from typing import Any, Sequence

from . import (
_api_calls,
config,
Expand All @@ -33,6 +35,7 @@
utils,
)
from .__version__ import __version__
from .base import OpenMLBase
from .datasets import OpenMLDataFeature, OpenMLDataset
from .evaluations import OpenMLEvaluation
from .flows import OpenMLFlow
Expand All @@ -50,6 +53,37 @@
)


def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = None) -> Any:
"""Publish a common object (flow/model/run/dataset) with minimal friction.

If ``obj`` is already an OpenML object (``OpenMLBase``) it will call its ``publish`` method.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please write a proper docstring.

Otherwise it looks for a registered extension (e.g., scikit-learn) to convert the object
into an ``OpenMLFlow`` and publish it.
"""
if isinstance(obj, OpenMLBase):
if tags is not None and hasattr(obj, "tags"):
existing = list(getattr(obj, "tags", []) or [])
merged = list(dict.fromkeys([*existing, *tags]))
obj.tags = merged
if name is not None and hasattr(obj, "name"):
obj.name = name
return obj.publish()

extension = extensions.functions.get_extension_by_model(obj, raise_if_no_extension=True)
if extension is None: # defensive; should not happen with raise_if_no_extension=True
raise ValueError("No extension registered to handle the provided object.")
flow = extension.model_to_flow(obj)

if name is not None:
flow.name = name

if tags is not None:
existing_tags = list(getattr(flow, "tags", []) or [])
flow.tags = list(dict.fromkeys([*existing_tags, *tags]))

return flow.publish()


def populate_cache(
task_ids: list[int] | None = None,
dataset_ids: list[int | str] | None = None,
Expand Down Expand Up @@ -91,6 +125,7 @@ def populate_cache(


__all__ = [
"publish",
"OpenMLDataset",
"OpenMLDataFeature",
"OpenMLRun",
Expand Down
51 changes: 51 additions & 0 deletions tests/test_openml/test_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,54 @@ def test_populate_cache(
assert task_mock.call_count == 2
for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
assert argument[0] == fixture

def test_publish_with_openml_object_merges_tags_and_name(self):
class Dummy(openml.base.OpenMLBase):
def __init__(self) -> None:
self.tags = ["a"]
self.name = "orig"
self.published = False

@property
def id(self):
return None

def _get_repr_body_fields(self):
return []

def _to_dict(self):
return {}

def _parse_publish_response(self, xml_response):
return None

def publish(self):
self.published = True
return self

obj = Dummy()
result = openml.publish(obj, name="new", tags=["b", "a"])
assert result is obj
assert obj.published is True
assert obj.name == "new"
assert obj.tags == ["a", "b"] # dedup and preserve order from original

@mock.patch("openml.extensions.functions.get_extension_by_model")
def test_publish_with_extension(self, get_ext_mock):
flow_mock = mock.MagicMock()
flow_mock.tags = []
flow_mock.publish.return_value = "flow-id"

ext_instance = mock.MagicMock()
ext_instance.model_to_flow.return_value = flow_mock
get_ext_mock.return_value = ext_instance

model = object()
flow_id = openml.publish(model, name="n", tags=["x"])

get_ext_mock.assert_called_once_with(model, raise_if_no_extension=True)
ext_instance.model_to_flow.assert_called_once_with(model)
assert flow_mock.name == "n"
assert flow_mock.tags == ["x"]
flow_mock.publish.assert_called_once_with()
assert flow_id == "flow-id"