Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 9 additions & 43 deletions examples/Basics/simple_flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# %% [markdown]
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.

# %%
import sklearn
from sklearn.neighbors import KNeighborsClassifier

import openml

from openml_sklearn import SklearnExtension
# %% [markdown]
# <div class="admonition warning">
# <p class="admonition-title">Warning</p>
Expand All @@ -20,68 +18,39 @@
# OpenML-Python API.
# </p>
# </div>

# %%
openml.config.start_using_configuration_for_example()

# %% [markdown]
# ## Train a machine learning model and evaluate it
# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20

# %%
task = openml.tasks.get_task(119)

# Get the data
dataset = task.get_dataset()
X, y, categorical_indicator, attribute_names = dataset.get_data(
target=dataset.default_target_attribute
)

# Get the holdout split from the task
train_indices, test_indices = task.get_train_test_split_indices(fold=0, repeat=0)
X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]

knn_parameters = {
"n_neighbors": 3,
}
clf = KNeighborsClassifier(**knn_parameters)
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train)

# Get experiment results
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

# %% [markdown]
# ## Upload the machine learning experiments to OpenML
# First, create a fow and fill it with metadata about the machine learning model.

# Create a flow from the trained model using the sklearn extension.
# This automatically extracts all metadata and hyperparameters.
# %%
knn_flow = openml.flows.OpenMLFlow(
# Metadata
model=clf, # or None, if you do not want to upload the model object.
name="CustomKNeighborsClassifier",
description="A custom KNeighborsClassifier flow for OpenML.",
external_version=f"{sklearn.__version__}",
language="English",
tags=["openml_tutorial_knn"],
dependencies=f"{sklearn.__version__}",
# Hyperparameters
parameters={k: str(v) for k, v in knn_parameters.items()},
parameters_meta_info={
"n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
},
# If you have a pipeline with subcomponents, such as preprocessing, add them here.
components={},
)
extension = SklearnExtension()
knn_flow = extension.model_to_flow(clf)
knn_flow.publish()
print(f"knn_flow was published with the ID {knn_flow.flow_id}")

# %% [markdown]
# Second, we create a run to store the results associated with the flow.

# %%

# Format the predictions for OpenML
predictions = []
for test_index, y_true_i, y_pred_i, y_pred_proba_i in zip(
Expand All @@ -98,13 +67,11 @@
proba=dict(zip(task.class_labels, y_pred_proba_i)),
)
)

# Format the parameters for OpenML
# Get parameters from the flow
oml_knn_parameters = [
{"oml:name": k, "oml:value": v, "oml:component": knn_flow.flow_id}
for k, v in knn_parameters.items()
for k, v in knn_flow.parameters.items()
]

knn_run = openml.runs.OpenMLRun(
task_id=task.task_id,
flow_id=knn_flow.flow_id,
Expand All @@ -117,6 +84,5 @@
knn_run = knn_run.publish()
print(f"Run was uploaded to {knn_run.openml_url}")
print(f"The flow can be found at {knn_run.flow.openml_url}")

# %%
openml.config.stop_using_configuration_for_example()
openml.config.stop_using_configuration_for_example()