From 0f216401c32a5a8a1091373122426259d206c0c0 Mon Sep 17 00:00:00 2001
From: Omswastik-11 <omswastikpanda11@gmail.com>
Date: Wed, 24 Dec 2025 15:55:36 +0530
Subject: [PATCH 1/4] improve publish api for users

---
 .../Basics/simple_flows_and_runs_tutorial.py  |  8 +++
 openml/__init__.py                            | 35 +++++++++++++
 tests/test_openml/test_openml.py              | 51 +++++++++++++++++++
 3 files changed, 94 insertions(+)

diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
index 41eed9234..f5c165214 100644
--- a/examples/Basics/simple_flows_and_runs_tutorial.py
+++ b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -48,6 +48,13 @@
 clf = KNeighborsClassifier(**knn_parameters)
 clf.fit(X_train, y_train)
 
+# Option A: auto-publish the estimator via unified helper (requires openml-sklearn extension).
+try:
+    flow_id = openml.publish(clf)
+    print(f"Auto-published flow id: {flow_id}")
+except Exception as ex:  # pragma: no cover - example path
+    print(f"Auto-publish failed (is openml-sklearn installed?): {ex}")
+
 # Get experiment results
 y_pred = clf.predict(X_test)
 y_pred_proba = clf.predict_proba(X_test)
@@ -57,6 +64,7 @@
 # First, create a fow and fill it with metadata about the machine learning model.
 
 # %%
+# Option B: manually build the flow
 knn_flow = openml.flows.OpenMLFlow(
     # Metadata
     model=clf,  # or None, if you do not want to upload the model object.
diff --git a/openml/__init__.py b/openml/__init__.py
index c49505eb9..d691bd22b 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -18,6 +18,8 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+from typing import Any, Sequence
+
 from . import (
     _api_calls,
     config,
@@ -33,6 +35,7 @@
     utils,
 )
 from .__version__ import __version__
+from .base import OpenMLBase
 from .datasets import OpenMLDataFeature, OpenMLDataset
 from .evaluations import OpenMLEvaluation
 from .flows import OpenMLFlow
@@ -50,6 +53,37 @@
 )
 
 
+def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = None) -> Any:
+    """Publish a common object (flow/model/run/dataset) with minimal friction.
+
+    If ``obj`` is already an OpenML object (``OpenMLBase``) it will call its ``publish`` method.
+    Otherwise it looks for a registered extension (e.g., scikit-learn) to convert the object
+    into an ``OpenMLFlow`` and publish it.
+    """
+    if isinstance(obj, OpenMLBase):
+        if tags is not None and hasattr(obj, "tags"):
+            existing = list(getattr(obj, "tags", []) or [])
+            merged = list(dict.fromkeys([*existing, *tags]))
+            obj.tags = merged
+        if name is not None and hasattr(obj, "name"):
+            obj.name = name
+        return obj.publish()
+
+    extension = extensions.functions.get_extension_by_model(obj, raise_if_no_extension=True)
+    if extension is None:  # defensive; should not happen with raise_if_no_extension=True
+        raise ValueError("No extension registered to handle the provided object.")
+    flow = extension.model_to_flow(obj)
+
+    if name is not None:
+        flow.name = name
+
+    if tags is not None:
+        existing_tags = list(getattr(flow, "tags", []) or [])
+        flow.tags = list(dict.fromkeys([*existing_tags, *tags]))
+
+    return flow.publish()
+
+
 def populate_cache(
     task_ids: list[int] | None = None,
     dataset_ids: list[int | str] | None = None,
@@ -91,6 +125,7 @@ def populate_cache(
 
 
 __all__ = [
+    "publish",
     "OpenMLDataset",
     "OpenMLDataFeature",
     "OpenMLRun",
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index 998046726..28e6c4e1c 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -41,3 +41,54 @@ def test_populate_cache(
         assert task_mock.call_count == 2
         for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
             assert argument[0] == fixture
+
+    def test_publish_with_openml_object_merges_tags_and_name(self):
+        class Dummy(openml.base.OpenMLBase):
+            def __init__(self) -> None:
+                self.tags = ["a"]
+                self.name = "orig"
+                self.published = False
+
+            @property
+            def id(self):
+                return None
+
+            def _get_repr_body_fields(self):
+                return []
+
+            def _to_dict(self):
+                return {}
+
+            def _parse_publish_response(self, xml_response):
+                return None
+
+            def publish(self):
+                self.published = True
+                return self
+
+        obj = Dummy()
+        result = openml.publish(obj, name="new", tags=["b", "a"])
+        assert result is obj
+        assert obj.published is True
+        assert obj.name == "new"
+        assert obj.tags == ["a", "b"]  # dedup and preserve order from original
+
+    @mock.patch("openml.extensions.functions.get_extension_by_model")
+    def test_publish_with_extension(self, get_ext_mock):
+        flow_mock = mock.MagicMock()
+        flow_mock.tags = []
+        flow_mock.publish.return_value = "flow-id"
+
+        ext_instance = mock.MagicMock()
+        ext_instance.model_to_flow.return_value = flow_mock
+        get_ext_mock.return_value = ext_instance
+
+        model = object()
+        flow_id = openml.publish(model, name="n", tags=["x"])
+
+        get_ext_mock.assert_called_once_with(model, raise_if_no_extension=True)
+        ext_instance.model_to_flow.assert_called_once_with(model)
+        assert flow_mock.name == "n"
+        assert flow_mock.tags == ["x"]
+        flow_mock.publish.assert_called_once_with()
+        assert flow_id == "flow-id"

From 3b1d9616981291d0c0fc9f6896e8996e98fc73fe Mon Sep 17 00:00:00 2001
From: Omswastik-11 <omswastikpanda11@gmail.com>
Date: Thu, 25 Dec 2025 13:20:24 +0530
Subject: [PATCH 2/4] improve doc-string

---
 .../Basics/simple_flows_and_runs_tutorial.py  |  62 +++++----
 openml/__init__.py                            | 128 +++++++++++++++---
 2 files changed, 140 insertions(+), 50 deletions(-)

diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
index f5c165214..05aea7a02 100644
--- a/examples/Basics/simple_flows_and_runs_tutorial.py
+++ b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -2,7 +2,6 @@
 # A simple tutorial on how to upload results from a machine learning experiment to OpenML.
 
 # %%
-import sklearn
 from sklearn.neighbors import KNeighborsClassifier
 
 import openml
@@ -48,42 +47,47 @@
 clf = KNeighborsClassifier(**knn_parameters)
 clf.fit(X_train, y_train)
 
-# Option A: auto-publish the estimator via unified helper (requires openml-sklearn extension).
-try:
-    flow_id = openml.publish(clf)
-    print(f"Auto-published flow id: {flow_id}")
-except Exception as ex:  # pragma: no cover - example path
-    print(f"Auto-publish failed (is openml-sklearn installed?): {ex}")
-
 # Get experiment results
 y_pred = clf.predict(X_test)
 y_pred_proba = clf.predict_proba(X_test)
 
 # %% [markdown]
 # ## Upload the machine learning experiments to OpenML
-# First, create a fow and fill it with metadata about the machine learning model.
+#
+# ### Option A: Automatic publishing (simplified)
+# The publish function automatically detects the model type and creates the flow:
 
 # %%
-# Option B: manually build the flow
-knn_flow = openml.flows.OpenMLFlow(
-    # Metadata
-    model=clf,  # or None, if you do not want to upload the model object.
-    name="CustomKNeighborsClassifier",
-    description="A custom KNeighborsClassifier flow for OpenML.",
-    external_version=f"{sklearn.__version__}",
-    language="English",
-    tags=["openml_tutorial_knn"],
-    dependencies=f"{sklearn.__version__}",
-    # Hyperparameters
-    parameters={k: str(v) for k, v in knn_parameters.items()},
-    parameters_meta_info={
-        "n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
-    },
-    # If you have a pipeline with subcomponents, such as preprocessing, add them here.
-    components={},
-)
-knn_flow.publish()
-print(f"knn_flow was published with the ID {knn_flow.flow_id}")
+knn_flow = openml.publish(clf, tags=["openml_tutorial_knn"])
+print(f"Flow was auto-published with ID {knn_flow.flow_id}")
+
+# %% [markdown]
+# ### Option B: Manual flow construction (full control)
+# For advanced use cases, you can manually construct the flow:
+
+# %%
+# Uncomment to use manual flow construction:
+# knn_flow_manual = openml.flows.OpenMLFlow(
+#     name="sklearn.neighbors.classification.KNeighborsClassifier(my_name)",
+#     class_name="sklearn.neighbors.classification.KNeighborsClassifier",
+#     description="KNeighborsClassifier(algorithm='brute', leaf_size=30, \n"
+#     "metric='minkowski', metric_params=None, n_jobs=-1, \n"
+#     "n_neighbors=5, p=2, weights='uniform')",
+#     model=clf,
+#     components=OrderedDict(),
+#     parameters=OrderedDict(),
+#     parameters_meta_dict=OrderedDict(),
+#     external_version="0.20.0",
+#     tags=["openml_tutorial_knn"],
+#     language="English",
+#     dependencies="sklearn==0.20.0\nnumpy>=1.6.1\nscipy>=0.9",
+# )
+# knn_flow_manual.extension = extension
+# knn_flow_manual = knn_flow_manual.publish(raise_error_if_exists=True)
+# print(f"Manual flow URL: {knn_flow_manual.openml_url}")
+
+# %% [markdown]
+# Now we'll use the auto-published flow to create and upload a run.
 
 # %% [markdown]
 # Second, we create a run to store the results associated with the flow.
diff --git a/openml/__init__.py b/openml/__init__.py
index d691bd22b..4d1b0bcd5 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -18,6 +18,7 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+import contextlib
 from typing import Any, Sequence
 
 from . import (
@@ -56,10 +57,72 @@
 def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = None) -> Any:
     """Publish a common object (flow/model/run/dataset) with minimal friction.
 
-    If ``obj`` is already an OpenML object (``OpenMLBase``) it will call its ``publish`` method.
-    Otherwise it looks for a registered extension (e.g., scikit-learn) to convert the object
-    into an ``OpenMLFlow`` and publish it.
+    This function provides a unified entry point for publishing various OpenML objects.
+    It automatically detects the object type and routes to the appropriate publishing
+    mechanism:
+
+    - For OpenML objects (``OpenMLDataset``, ``OpenMLFlow``, ``OpenMLRun``, etc.),
+      it directly calls their ``publish()`` method.
+    - For external models (e.g., scikit-learn estimators), it uses registered
+      extensions to convert them to ``OpenMLFlow`` objects before publishing.
+
+    Parameters
+    ----------
+    obj : Any
+        The object to publish. Can be:
+        - An OpenML object (OpenMLDataset, OpenMLFlow, OpenMLRun, OpenMLTask)
+        - A machine learning model from a supported framework (e.g., scikit-learn)
+    name : str, optional
+        Override the default name for the published object.
+        If not provided, uses the object's default naming convention.
+    tags : Sequence[str], optional
+        Additional tags to attach to the published object.
+        Will be merged with any existing tags, removing duplicates while
+        preserving order.
+
+    Returns
+    -------
+    Any
+        The published object (typically with updated ID and metadata).
+
+    Raises
+    ------
+    ValueError
+        If no extension is registered to handle the provided model type.
+
+    Examples
+    --------
+    Publishing an OpenML dataset:
+
+    >>> dataset = openml.datasets.get_dataset(61)
+    >>> openml.publish(dataset, tags=["example"])
+
+    Publishing a scikit-learn model:
+
+    >>> from sklearn.tree import DecisionTreeClassifier
+    >>> clf = DecisionTreeClassifier(max_depth=5)
+    >>> openml.publish(clf, name="MyDecisionTree", tags=["tutorial"])
+
+    Publishing an OpenML flow directly:
+
+    >>> flow = openml.flows.OpenMLFlow(...)
+    >>> openml.publish(flow)
+
+    Publishing an OpenML run (after execution with predictions):
+
+    >>> run = openml.runs.OpenMLRun(
+    ...     task_id=1, flow_id=100, dataset_id=61,
+    ...     data_content=predictions  # predictions from model evaluation
+    ... )
+    >>> openml.publish(run, tags=["experiment"])
+
+    Notes
+    -----
+    For external models (e.g., scikit-learn), the corresponding extension must be
+    installed (e.g., ``openml-sklearn``). The extension will be automatically imported
+    if available.
     """
+    # Case 1: Object is already an OpenML entity
     if isinstance(obj, OpenMLBase):
         if tags is not None and hasattr(obj, "tags"):
             existing = list(getattr(obj, "tags", []) or [])
@@ -69,8 +132,12 @@ def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = N
             obj.name = name
         return obj.publish()
 
+    # Case 2: Object is an external model - use extension registry
+    # Attempt to auto-import common extensions
+    _ensure_extension_imported(obj)
+
     extension = extensions.functions.get_extension_by_model(obj, raise_if_no_extension=True)
-    if extension is None:  # defensive; should not happen with raise_if_no_extension=True
+    if extension is None:  # Defensive check (should not occur with raise_if_no_extension=True)
         raise ValueError("No extension registered to handle the provided object.")
     flow = extension.model_to_flow(obj)
 
@@ -84,6 +151,25 @@ def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = N
     return flow.publish()
 
 
+def _ensure_extension_imported(obj: Any) -> None:
+    """Attempt to import the appropriate extension for common frameworks.
+
+    This is a convenience helper to automatically import extensions for
+    well-known frameworks, reducing friction for users.
+
+    Parameters
+    ----------
+    obj : Any
+        The object to check.
+    """
+    obj_module = type(obj).__module__
+
+    # Check for scikit-learn models
+    if obj_module.startswith("sklearn"):
+        with contextlib.suppress(ImportError):
+            import openml_sklearn  # noqa: F401
+
+
 def populate_cache(
     task_ids: list[int] | None = None,
     dataset_ids: list[int | str] | None = None,
@@ -125,34 +211,34 @@ def populate_cache(
 
 
 __all__ = [
-    "publish",
-    "OpenMLDataset",
+    "OpenMLBenchmarkSuite",
+    "OpenMLClassificationTask",
+    "OpenMLClusteringTask",
     "OpenMLDataFeature",
-    "OpenMLRun",
-    "OpenMLSplit",
+    "OpenMLDataset",
     "OpenMLEvaluation",
-    "OpenMLSetup",
-    "OpenMLParameter",
-    "OpenMLTask",
-    "OpenMLSupervisedTask",
-    "OpenMLClusteringTask",
+    "OpenMLFlow",
     "OpenMLLearningCurveTask",
+    "OpenMLParameter",
     "OpenMLRegressionTask",
-    "OpenMLClassificationTask",
-    "OpenMLFlow",
+    "OpenMLRun",
+    "OpenMLSetup",
+    "OpenMLSplit",
     "OpenMLStudy",
-    "OpenMLBenchmarkSuite",
+    "OpenMLSupervisedTask",
+    "OpenMLTask",
+    "__version__",
+    "_api_calls",
+    "config",
     "datasets",
     "evaluations",
     "exceptions",
     "extensions",
-    "config",
-    "runs",
     "flows",
-    "tasks",
+    "publish",
+    "runs",
     "setups",
     "study",
+    "tasks",
     "utils",
-    "_api_calls",
-    "__version__",
 ]

From 3dfe34a6802a9965f8c9e3a1eb86759893349984 Mon Sep 17 00:00:00 2001
From: Omswastik-11 <omswastikpanda11@gmail.com>
Date: Thu, 25 Dec 2025 13:29:23 +0530
Subject: [PATCH 3/4] update __init__.py

---
 openml/__init__.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/openml/__init__.py b/openml/__init__.py
index 4d1b0bcd5..0ff233394 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -211,34 +211,34 @@ def populate_cache(
 
 
 __all__ = [
-    "OpenMLBenchmarkSuite",
-    "OpenMLClassificationTask",
-    "OpenMLClusteringTask",
-    "OpenMLDataFeature",
     "OpenMLDataset",
+    "OpenMLDataFeature",
+    "OpenMLRun",
+    "OpenMLSplit",
     "OpenMLEvaluation",
-    "OpenMLFlow",
-    "OpenMLLearningCurveTask",
+    "OpenMLSetup",
     "OpenMLParameter",
+    "OpenMLTask",
+    "OpenMLSupervisedTask",
+    "OpenMLClusteringTask",
+    "OpenMLLearningCurveTask",
     "OpenMLRegressionTask",
-    "OpenMLRun",
-    "OpenMLSetup",
-    "OpenMLSplit",
+    "OpenMLClassificationTask",
+    "OpenMLFlow",
     "OpenMLStudy",
-    "OpenMLSupervisedTask",
-    "OpenMLTask",
-    "__version__",
-    "_api_calls",
-    "config",
+    "OpenMLBenchmarkSuite",
     "datasets",
     "evaluations",
     "exceptions",
     "extensions",
-    "flows",
-    "publish",
+    "config",
     "runs",
+    "flows",
+    "tasks",
     "setups",
     "study",
-    "tasks",
     "utils",
+    "_api_calls",
+    "__version__",
+    "publish",
 ]

From db367783ec28085f6277c13b9a8ab287ff9d3438 Mon Sep 17 00:00:00 2001
From: Omswastik-11 <omswastikpanda11@gmail.com>
Date: Thu, 25 Dec 2025 13:34:46 +0530
Subject: [PATCH 4/4] update examples

---
 .../Basics/simple_flows_and_runs_tutorial.py  | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
index 05aea7a02..050f3353a 100644
--- a/examples/Basics/simple_flows_and_runs_tutorial.py
+++ b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -3,7 +3,7 @@
 
 # %%
 from sklearn.neighbors import KNeighborsClassifier
-
+import sklearn
 import openml
 
 # %% [markdown]
@@ -66,25 +66,25 @@
 # For advanced use cases, you can manually construct the flow:
 
 # %%
-# Uncomment to use manual flow construction:
-# knn_flow_manual = openml.flows.OpenMLFlow(
-#     name="sklearn.neighbors.classification.KNeighborsClassifier(my_name)",
-#     class_name="sklearn.neighbors.classification.KNeighborsClassifier",
-#     description="KNeighborsClassifier(algorithm='brute', leaf_size=30, \n"
-#     "metric='minkowski', metric_params=None, n_jobs=-1, \n"
-#     "n_neighbors=5, p=2, weights='uniform')",
-#     model=clf,
-#     components=OrderedDict(),
-#     parameters=OrderedDict(),
-#     parameters_meta_dict=OrderedDict(),
-#     external_version="0.20.0",
-#     tags=["openml_tutorial_knn"],
-#     language="English",
-#     dependencies="sklearn==0.20.0\nnumpy>=1.6.1\nscipy>=0.9",
-# )
-# knn_flow_manual.extension = extension
-# knn_flow_manual = knn_flow_manual.publish(raise_error_if_exists=True)
-# print(f"Manual flow URL: {knn_flow_manual.openml_url}")
+knn_flow = openml.flows.OpenMLFlow(
+    # Metadata
+    model=clf,  # or None, if you do not want to upload the model object.
+    name="CustomKNeighborsClassifier",
+    description="A custom KNeighborsClassifier flow for OpenML.",
+    external_version=f"{sklearn.__version__}",
+    language="English",
+    tags=["openml_tutorial_knn"],
+    dependencies=f"{sklearn.__version__}",
+    # Hyperparameters
+    parameters={k: str(v) for k, v in knn_parameters.items()},
+    parameters_meta_info={
+        "n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
+    },
+    # If you have a pipeline with subcomponents, such as preprocessing, add them here.
+    components={},
+)
+knn_flow.publish()
+print(f"knn_flow was published with the ID {knn_flow.flow_id}")
 
 # %% [markdown]
 # Now we'll use the auto-published flow to create and upload a run.