From bb4554f441cb31fadb848c4e1645bdea6a0b99bf Mon Sep 17 00:00:00 2001
From: Omswastik-11 <omswastikpanda11@gmail.com>
Date: Wed, 24 Dec 2025 00:18:32 +0530
Subject: [PATCH] improved the Getter API for users

---
 examples/Advanced/tasks_tutorial.py           | 28 +++++--
 examples/Basics/simple_datasets_tutorial.py   | 12 ++-
 .../Basics/simple_flows_and_runs_tutorial.py  | 15 +++-
 examples/Basics/simple_tasks_tutorial.py      |  5 +-
 openml/__init__.py                            | 75 ++++++++++++++++++-
 tests/test_openml/test_openml.py              | 24 ++++++
 6 files changed, 143 insertions(+), 16 deletions(-)

diff --git a/examples/Advanced/tasks_tutorial.py b/examples/Advanced/tasks_tutorial.py
index dff7293ad..1418aa91c 100644
--- a/examples/Advanced/tasks_tutorial.py
+++ b/examples/Advanced/tasks_tutorial.py
@@ -24,13 +24,15 @@
 #
 # We will start by simply listing only *supervised classification* tasks.
 #
-# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, but we
-# request a
+# **openml.list("task")** (or **openml.tasks.list_tasks()**) returns a dictionary of
+# dictionaries by default, but we request a
 # [pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)
 # instead to have better visualization capabilities and easier access:
 
 # %%
-tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
+tasks = openml.list("task", task_type=TaskType.SUPERVISED_CLASSIFICATION)
+# Legacy path still works:
+# tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
 print(tasks.columns)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
@@ -66,7 +68,9 @@
 # Similar to listing tasks by task type, we can list tasks by tags:
 
 # %%
-tasks = openml.tasks.list_tasks(tag="OpenML100")
+tasks = openml.list("task", tag="OpenML100")
+# Legacy path still works:
+# tasks = openml.tasks.list_tasks(tag="OpenML100")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
@@ -74,7 +78,9 @@
 # Furthermore, we can list tasks based on the dataset id:
 
 # %%
-tasks = openml.tasks.list_tasks(data_id=1471)
+tasks = openml.list("task", data_id=1471)
+# Legacy path still works:
+# tasks = openml.tasks.list_tasks(data_id=1471)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
@@ -82,7 +88,9 @@
 # In addition, a size limit and an offset can be applied both separately and simultaneously:
 
 # %%
-tasks = openml.tasks.list_tasks(size=10, offset=50)
+tasks = openml.list("task", size=10, offset=50)
+# Legacy path still works:
+# tasks = openml.tasks.list_tasks(size=10, offset=50)
 print(tasks)
 
 # %% [markdown]
@@ -98,7 +106,9 @@
 # Finally, it is also possible to list all tasks on OpenML with:
 
 # %%
-tasks = openml.tasks.list_tasks()
+tasks = openml.list("task")
+# Legacy path still works:
+# tasks = openml.tasks.list_tasks()
 print(len(tasks))
 
 # %% [markdown]
@@ -118,7 +128,9 @@
 
 # %%
 task_id = 31
-task = openml.tasks.get_task(task_id)
+task = openml.get("task", task_id)
+# Legacy path still works:
+# task = openml.tasks.get_task(task_id)
 
 # %%
 # Properties of the task are stored as member variables:
diff --git a/examples/Basics/simple_datasets_tutorial.py b/examples/Basics/simple_datasets_tutorial.py
index 75d36ed0f..6d90c22cb 100644
--- a/examples/Basics/simple_datasets_tutorial.py
+++ b/examples/Basics/simple_datasets_tutorial.py
@@ -14,15 +14,23 @@
 # ## List datasets stored on OpenML
 
 # %%
-datasets_df = openml.datasets.list_datasets()
+datasets_df = openml.list("dataset")
 print(datasets_df.head(n=10))
 
+# Legacy path still works:
+# datasets_df = openml.datasets.list_datasets()
+
 # %% [markdown]
 # ## Download a dataset
 
 # %%
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(dataset_id=61)
+dataset = openml.get("dataset", 61)
+# You can also fetch by name:
+# dataset = openml.get("dataset", "Fashion-MNIST")
+
+# Legacy path still works:
+# dataset = openml.datasets.get_dataset(dataset_id=61)
 
 # Print a summary
 print(
diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
index 41eed9234..f99685f6d 100644
--- a/examples/Basics/simple_flows_and_runs_tutorial.py
+++ b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -24,12 +24,25 @@
 # %%
 openml.config.start_using_configuration_for_example()
 
+# %% [markdown]
+# ## Quick: list flows and runs via unified entrypoints
+
+# %%
+flows_df = openml.list("flow", size=3)
+print(flows_df.head())
+
+runs_df = openml.list("run", size=3)
+print(runs_df.head())
+
 # %% [markdown]
 # ## Train a machine learning model and evaluate it
 # NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
 
 # %%
-task = openml.tasks.get_task(119)
+task = openml.get("task", 119)
+
+# Legacy path still works:
+# task = openml.tasks.get_task(119)
 
 # Get the data
 dataset = task.get_dataset()
diff --git a/examples/Basics/simple_tasks_tutorial.py b/examples/Basics/simple_tasks_tutorial.py
index 598ce4e71..0989d3e1d 100644
--- a/examples/Basics/simple_tasks_tutorial.py
+++ b/examples/Basics/simple_tasks_tutorial.py
@@ -10,7 +10,10 @@
 # [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31):
 
 # %%
-task = openml.tasks.get_task(31)
+task = openml.get("task", 31)
+
+# Legacy path still works:
+# task = openml.tasks.get_task(31)
 
 # %% [markdown]
 # Get the dataset and its data from the task.
diff --git a/openml/__init__.py b/openml/__init__.py
index c49505eb9..81aa7b44a 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -18,6 +18,9 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+import builtins
+from typing import Any, Callable, Dict
+
 from . import (
     _api_calls,
     config,
@@ -49,12 +52,74 @@
     OpenMLTask,
 )
 
+ListDispatcher = Dict[str, Callable[..., Any]]
+GetDispatcher = Dict[str, Callable[..., Any]]
+
+
+def list(object_type: str, /, **kwargs: Any) -> Any:  # noqa: A001
+    """List OpenML objects by type (e.g., datasets, tasks, flows, runs).
+
+    This is a convenience dispatcher that forwards to the existing type-specific
+    ``list_*`` functions. Existing imports remain available for backward compatibility.
+    """
+    dispatch: ListDispatcher = {
+        "dataset": datasets.functions.list_datasets,
+        "task": tasks.functions.list_tasks,
+        "flow": flows.functions.list_flows,
+        "run": runs.functions.list_runs,
+    }
+
+    try:
+        func = dispatch[object_type.lower()]
+    except KeyError as exc:  # pragma: no cover - defensive branch
+        raise ValueError(
+            "Unsupported object_type for list; expected one of 'dataset', 'task', 'flow', 'run'.",
+        ) from exc
+
+    return func(**kwargs)
+
+
+def get(object_type_or_name: Any, identifier: Any | None = None, /, **kwargs: Any) -> Any:
+    """Get an OpenML object by type and identifier, or a dataset by name.
+
+    Examples
+    --------
+    openml.get("dataset", 61)
+    openml.get("dataset", "Fashion-MNIST")
+    openml.get("task", 31)
+    openml.get("flow", 10)
+    openml.get("run", 20)
+    openml.get("Fashion-MNIST")  # dataset lookup by name (no type specified)
+    """
+    # Single-argument shortcut: treat string without type as dataset lookup.
+    if identifier is None:
+        if isinstance(object_type_or_name, str):
+            return datasets.functions.get_dataset(object_type_or_name, **kwargs)
+        raise ValueError("Please provide an object_type when identifier is not provided.")
+
+    object_type = str(object_type_or_name).lower()
+    dispatch: GetDispatcher = {
+        "dataset": datasets.functions.get_dataset,
+        "task": tasks.functions.get_task,
+        "flow": flows.functions.get_flow,
+        "run": runs.functions.get_run,
+    }
+
+    try:
+        func = dispatch[object_type]
+    except KeyError as exc:  # pragma: no cover - defensive branch
+        raise ValueError(
+            "Unsupported object_type for get; expected one of 'dataset', 'task', 'flow', 'run'.",
+        ) from exc
+
+    return func(identifier, **kwargs)
+
 
 def populate_cache(
-    task_ids: list[int] | None = None,
-    dataset_ids: list[int | str] | None = None,
-    flow_ids: list[int] | None = None,
-    run_ids: list[int] | None = None,
+    task_ids: builtins.list[int] | None = None,
+    dataset_ids: builtins.list[int | str] | None = None,
+    flow_ids: builtins.list[int] | None = None,
+    run_ids: builtins.list[int] | None = None,
 ) -> None:
     """
     Populate a cache for offline and parallel usage of the OpenML connector.
@@ -91,6 +156,8 @@ def populate_cache(
 
 
 __all__ = [
+    "list",
+    "get",
     "OpenMLDataset",
     "OpenMLDataFeature",
     "OpenMLRun",
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index 998046726..0cd3b8211 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -41,3 +41,27 @@ def test_populate_cache(
         assert task_mock.call_count == 2
         for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
             assert argument[0] == fixture
+
+    @mock.patch("openml.tasks.functions.list_tasks")
+    @mock.patch("openml.datasets.functions.list_datasets")
+    def test_list_dispatch(self, list_datasets_mock, list_tasks_mock):
+        openml.list("dataset", output_format="dataframe")
+        list_datasets_mock.assert_called_once_with(output_format="dataframe")
+
+        openml.list("task", size=5)
+        list_tasks_mock.assert_called_once_with(size=5)
+
+    @mock.patch("openml.tasks.functions.get_task")
+    @mock.patch("openml.datasets.functions.get_dataset")
+    def test_get_dispatch(self, get_dataset_mock, get_task_mock):
+        openml.get("dataset", 61)
+        get_dataset_mock.assert_called_with(61)
+
+        openml.get("dataset", "Fashion-MNIST", version=2)
+        get_dataset_mock.assert_called_with("Fashion-MNIST", version=2)
+
+        openml.get("Fashion-MNIST")
+        get_dataset_mock.assert_called_with("Fashion-MNIST")
+
+        openml.get("task", 31)
+        get_task_mock.assert_called_with(31)