From 8c04a5496053e44c428f5d091c7c94e4513849a4 Mon Sep 17 00:00:00 2001 From: Omswastik-11 Date: Tue, 23 Dec 2025 23:40:36 +0530 Subject: [PATCH] added top-level convinent getter APIs --- examples/Advanced/tasks_tutorial.py | 29 ++++++++++++++----- examples/Basics/simple_datasets_tutorial.py | 10 +++++-- .../Basics/simple_flows_and_runs_tutorial.py | 7 +++-- examples/Basics/simple_tasks_tutorial.py | 5 +++- openml/__init__.py | 12 ++++++++ tests/test_openml/test_openml.py | 14 +++++++++ 6 files changed, 64 insertions(+), 13 deletions(-) diff --git a/examples/Advanced/tasks_tutorial.py b/examples/Advanced/tasks_tutorial.py index dff7293ad..082baad7b 100644 --- a/examples/Advanced/tasks_tutorial.py +++ b/examples/Advanced/tasks_tutorial.py @@ -24,13 +24,15 @@ # # We will start by simply listing only *supervised classification* tasks. # -# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, but we -# request a +# **openml.list_tasks()** (or **openml.tasks.list_tasks()**) returns a dictionary of dictionaries by default, but we request a # [pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) # instead to have better visualization capabilities and easier access: # %% -tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION) +# New: top-level convenience alias +tasks = openml.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION) +# Old path still works: +# tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION) print(tasks.columns) print(f"First 5 of {len(tasks)} tasks:") print(tasks.head()) @@ -66,7 +68,9 @@ # Similar to listing tasks by task type, we can list tasks by tags: # %% -tasks = openml.tasks.list_tasks(tag="OpenML100") +tasks = openml.list_tasks(tag="OpenML100") +# Old path still works: +# tasks = openml.tasks.list_tasks(tag="OpenML100") print(f"First 5 of {len(tasks)} tasks:") print(tasks.head()) @@ -74,7 +78,9 @@ # Furthermore, we can list tasks based on the dataset id: # %% -tasks = openml.tasks.list_tasks(data_id=1471) +tasks = openml.list_tasks(data_id=1471) +# Old path still works: +# tasks = openml.tasks.list_tasks(data_id=1471) print(f"First 5 of {len(tasks)} tasks:") print(tasks.head()) @@ -82,7 +88,9 @@ # In addition, a size limit and an offset can be applied both separately and simultaneously: # %% -tasks = openml.tasks.list_tasks(size=10, offset=50) +tasks = openml.list_tasks(size=10, offset=50) +# Old path still works: +# tasks = openml.tasks.list_tasks(size=10, offset=50) print(tasks) # %% [markdown] @@ -98,7 +106,9 @@ # Finally, it is also possible to list all tasks on OpenML with: # %% -tasks = openml.tasks.list_tasks() +tasks = openml.list_tasks() +# Old path still works: +# tasks = openml.tasks.list_tasks() print(len(tasks)) # %% [markdown] @@ -118,7 +128,10 @@ # %% task_id = 31 -task = openml.tasks.get_task(task_id) +# New: top-level convenience alias +task = openml.get_task(task_id) +# Old path still works: +# task = openml.tasks.get_task(task_id) # %% # Properties of the task are stored as member variables: diff --git a/examples/Basics/simple_datasets_tutorial.py b/examples/Basics/simple_datasets_tutorial.py index 75d36ed0f..a5a1d6b5a 100644 --- a/examples/Basics/simple_datasets_tutorial.py +++ b/examples/Basics/simple_datasets_tutorial.py @@ -14,7 +14,10 @@ # ## List datasets stored on OpenML # %% -datasets_df = openml.datasets.list_datasets() +# New: top-level convenience alias +datasets_df = openml.list_datasets() +# Old path still works for backwards compatibility: +# datasets_df = openml.datasets.list_datasets() print(datasets_df.head(n=10)) # %% [markdown] @@ -22,7 +25,10 @@ # %% # Iris dataset https://www.openml.org/d/61 -dataset = openml.datasets.get_dataset(dataset_id=61) +# New: top-level convenience alias +dataset = openml.get_dataset(dataset_id=61) +# Old path still works: +# dataset = openml.datasets.get_dataset(dataset_id=61) # Print a summary print( diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py index 41eed9234..7c356ffef 100644 --- a/examples/Basics/simple_flows_and_runs_tutorial.py +++ b/examples/Basics/simple_flows_and_runs_tutorial.py @@ -29,7 +29,10 @@ # NOTE: We are using task 119 from the test server: https://test.openml.org/d/20 # %% -task = openml.tasks.get_task(119) +# New: top-level convenience alias +task = openml.get_task(119) +# Old path still works: +# task = openml.tasks.get_task(119) # Get the data dataset = task.get_dataset() @@ -54,7 +57,7 @@ # %% [markdown] # ## Upload the machine learning experiments to OpenML -# First, create a fow and fill it with metadata about the machine learning model. +# First, create a flow and fill it with metadata about the machine learning model. # %% knn_flow = openml.flows.OpenMLFlow( diff --git a/examples/Basics/simple_tasks_tutorial.py b/examples/Basics/simple_tasks_tutorial.py index 598ce4e71..be82663e3 100644 --- a/examples/Basics/simple_tasks_tutorial.py +++ b/examples/Basics/simple_tasks_tutorial.py @@ -10,7 +10,10 @@ # [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31): # %% -task = openml.tasks.get_task(31) +# New: top-level convenience alias +task = openml.get_task(31) +# Old path still works: +# task = openml.tasks.get_task(31) # %% [markdown] # Get the dataset and its data from the task. diff --git a/openml/__init__.py b/openml/__init__.py index c49505eb9..df779faf9 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -34,9 +34,12 @@ ) from .__version__ import __version__ from .datasets import OpenMLDataFeature, OpenMLDataset +from .datasets.functions import get_dataset, list_datasets from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow +from .flows.functions import get_flow, list_flows from .runs import OpenMLRun +from .runs.functions import get_run, list_runs from .setups import OpenMLParameter, OpenMLSetup from .study import OpenMLBenchmarkSuite, OpenMLStudy from .tasks import ( @@ -48,6 +51,7 @@ OpenMLSupervisedTask, OpenMLTask, ) +from .tasks.functions import get_task, list_tasks def populate_cache( @@ -91,8 +95,12 @@ def populate_cache( __all__ = [ + "get_dataset", + "list_datasets", "OpenMLDataset", "OpenMLDataFeature", + "list_runs", + "get_run", "OpenMLRun", "OpenMLSplit", "OpenMLEvaluation", @@ -104,7 +112,11 @@ def populate_cache( "OpenMLLearningCurveTask", "OpenMLRegressionTask", "OpenMLClassificationTask", + "get_flow", + "list_flows", "OpenMLFlow", + "get_task", + "list_tasks", "OpenMLStudy", "OpenMLBenchmarkSuite", "datasets", diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py index 998046726..de3a8fd2b 100644 --- a/tests/test_openml/test_openml.py +++ b/tests/test_openml/test_openml.py @@ -41,3 +41,17 @@ def test_populate_cache( assert task_mock.call_count == 2 for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]): assert argument[0] == fixture + + def test_top_level_getters_aliases(self): + # Ensure top-level convenience aliases point to existing implementations. + assert openml.list_datasets is openml.datasets.functions.list_datasets + assert openml.get_dataset is openml.datasets.functions.get_dataset + + assert openml.list_flows is openml.flows.functions.list_flows + assert openml.get_flow is openml.flows.functions.get_flow + + assert openml.list_runs is openml.runs.functions.list_runs + assert openml.get_run is openml.runs.functions.get_run + + assert openml.list_tasks is openml.tasks.functions.list_tasks + assert openml.get_task is openml.tasks.functions.get_task