Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions examples/Advanced/tasks_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
#
# We will start by simply listing only *supervised classification* tasks.
#
# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, but we
# request a
# **openml.list_tasks()** (or **openml.tasks.list_tasks()**) returns a dictionary of dictionaries by default, but we request a
# [pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)
# instead to have better visualization capabilities and easier access:

# %%
tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
# New: top-level convenience alias
tasks = openml.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
# Old path still works:
# tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
print(tasks.columns)
print(f"First 5 of {len(tasks)} tasks:")
print(tasks.head())
Expand Down Expand Up @@ -66,23 +68,29 @@
# Similar to listing tasks by task type, we can list tasks by tags:

# %%
tasks = openml.tasks.list_tasks(tag="OpenML100")
tasks = openml.list_tasks(tag="OpenML100")
# Old path still works:
# tasks = openml.tasks.list_tasks(tag="OpenML100")
print(f"First 5 of {len(tasks)} tasks:")
print(tasks.head())

# %% [markdown]
# Furthermore, we can list tasks based on the dataset id:

# %%
tasks = openml.tasks.list_tasks(data_id=1471)
tasks = openml.list_tasks(data_id=1471)
# Old path still works:
# tasks = openml.tasks.list_tasks(data_id=1471)
print(f"First 5 of {len(tasks)} tasks:")
print(tasks.head())

# %% [markdown]
# In addition, a size limit and an offset can be applied both separately and simultaneously:

# %%
tasks = openml.tasks.list_tasks(size=10, offset=50)
tasks = openml.list_tasks(size=10, offset=50)
# Old path still works:
# tasks = openml.tasks.list_tasks(size=10, offset=50)
print(tasks)

# %% [markdown]
Expand All @@ -98,7 +106,9 @@
# Finally, it is also possible to list all tasks on OpenML with:

# %%
tasks = openml.tasks.list_tasks()
tasks = openml.list_tasks()
# Old path still works:
# tasks = openml.tasks.list_tasks()
print(len(tasks))

# %% [markdown]
Expand All @@ -118,7 +128,10 @@

# %%
task_id = 31
task = openml.tasks.get_task(task_id)
# New: top-level convenience alias
task = openml.get_task(task_id)
# Old path still works:
# task = openml.tasks.get_task(task_id)

# %%
# Properties of the task are stored as member variables:
Expand Down
10 changes: 8 additions & 2 deletions examples/Basics/simple_datasets_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,21 @@
# ## List datasets stored on OpenML

# %%
datasets_df = openml.datasets.list_datasets()
# New: top-level convenience alias
datasets_df = openml.list_datasets()
# Old path still works for backwards compatibility:
# datasets_df = openml.datasets.list_datasets()
print(datasets_df.head(n=10))

# %% [markdown]
# ## Download a dataset

# %%
# Iris dataset https://www.openml.org/d/61
dataset = openml.datasets.get_dataset(dataset_id=61)
# New: top-level convenience alias
dataset = openml.get_dataset(dataset_id=61)
# Old path still works:
# dataset = openml.datasets.get_dataset(dataset_id=61)

# Print a summary
print(
Expand Down
7 changes: 5 additions & 2 deletions examples/Basics/simple_flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20

# %%
task = openml.tasks.get_task(119)
# New: top-level convenience alias
task = openml.get_task(119)
# Old path still works:
# task = openml.tasks.get_task(119)

# Get the data
dataset = task.get_dataset()
Expand All @@ -54,7 +57,7 @@

# %% [markdown]
# ## Upload the machine learning experiments to OpenML
# First, create a fow and fill it with metadata about the machine learning model.
# First, create a flow and fill it with metadata about the machine learning model.

# %%
knn_flow = openml.flows.OpenMLFlow(
Expand Down
5 changes: 4 additions & 1 deletion examples/Basics/simple_tasks_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
# [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31):

# %%
task = openml.tasks.get_task(31)
# New: top-level convenience alias
task = openml.get_task(31)
# Old path still works:
# task = openml.tasks.get_task(31)

# %% [markdown]
# Get the dataset and its data from the task.
Expand Down
12 changes: 12 additions & 0 deletions openml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
)
from .__version__ import __version__
from .datasets import OpenMLDataFeature, OpenMLDataset
from .datasets.functions import get_dataset, list_datasets
from .evaluations import OpenMLEvaluation
from .flows import OpenMLFlow
from .flows.functions import get_flow, list_flows
from .runs import OpenMLRun
from .runs.functions import get_run, list_runs
from .setups import OpenMLParameter, OpenMLSetup
from .study import OpenMLBenchmarkSuite, OpenMLStudy
from .tasks import (
Expand All @@ -48,6 +51,7 @@
OpenMLSupervisedTask,
OpenMLTask,
)
from .tasks.functions import get_task, list_tasks


def populate_cache(
Expand Down Expand Up @@ -91,8 +95,12 @@ def populate_cache(


__all__ = [
"get_dataset",
"list_datasets",
"OpenMLDataset",
"OpenMLDataFeature",
"list_runs",
"get_run",
"OpenMLRun",
"OpenMLSplit",
"OpenMLEvaluation",
Expand All @@ -104,7 +112,11 @@ def populate_cache(
"OpenMLLearningCurveTask",
"OpenMLRegressionTask",
"OpenMLClassificationTask",
"get_flow",
"list_flows",
"OpenMLFlow",
"get_task",
"list_tasks",
"OpenMLStudy",
"OpenMLBenchmarkSuite",
"datasets",
Expand Down
14 changes: 14 additions & 0 deletions tests/test_openml/test_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,17 @@ def test_populate_cache(
assert task_mock.call_count == 2
for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
assert argument[0] == fixture

def test_top_level_getters_aliases(self):
# Ensure top-level convenience aliases point to existing implementations.
assert openml.list_datasets is openml.datasets.functions.list_datasets
assert openml.get_dataset is openml.datasets.functions.get_dataset

assert openml.list_flows is openml.flows.functions.list_flows
assert openml.get_flow is openml.flows.functions.get_flow

assert openml.list_runs is openml.runs.functions.list_runs
assert openml.get_run is openml.runs.functions.get_run

assert openml.list_tasks is openml.tasks.functions.list_tasks
assert openml.get_task is openml.tasks.functions.get_task