openml · satvshr · Dec 23, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -88,6 +88,8 @@ target/
 .idea
 *.swp
 .vscode
+.cursorignore
+.cursorindexingignore
 
 # MYPY
 .mypy_cache
@@ -96,4 +98,7 @@ dmypy.sock
 
 # Tests
 .pytest_cache
-.venv
+.venv
+
+# Ruff
+.ruff-cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ files: |
   )/.*\.py$
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.3
+    rev: v0.14.10
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]

diff --git a/examples/Advanced/fetch_evaluations_tutorial.py b/examples/Advanced/fetch_evaluations_tutorial.py
@@ -75,7 +75,7 @@
 
 def plot_cdf(values, metric="predictive_accuracy"):
     max_val = max(values)
-    n, bins, patches = plt.hist(values, density=True, histtype="step", cumulative=True, linewidth=3)
+    _, _, patches = plt.hist(values, density=True, histtype="step", cumulative=True, linewidth=3)
     patches[0].set_xy(patches[0].get_xy()[:-1])
     plt.xlim(max(0, min(values) - 0.1), 1)
     plt.title("CDF")
@@ -116,7 +116,7 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
     for i in range(len(flow_ids)):
         flow_values = evaluations[evaluations.flow_id == flow_ids[i]].value
         df = pd.concat([df, flow_values], ignore_index=True, axis=1)
-    fig, axs = plt.subplots()
+    _, axs = plt.subplots()
     df.boxplot()
     axs.set_title("Boxplot comparing " + metric + " for different flows")
     axs.set_ylabel(metric)
@@ -178,4 +178,4 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
     function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
 )
 
-print(evals_setups.head(10))
+print(evals_setups.head(10))
diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py
@@ -12,7 +12,7 @@
 # For certain functionality, such as uploading tasks or datasets, users have to
 # sign up. Only accessing the data on OpenML does not require an account!
 #
-# If you don’t have an account yet, sign up now.
+# If you dont have an account yet, sign up now.
 # You will receive an API key, which will authenticate you to the server
 # and allow you to download and upload datasets, tasks, runs and flows.
 #
@@ -52,4 +52,4 @@
 # %%
 import openml
 
-openml.config.set_root_cache_directory("YOURDIR")
+openml.config.set_root_cache_directory("YOURDIR")
diff --git a/examples/_external_or_deprecated/2015_neurips_feurer_example.py b/examples/_external_or_deprecated/2015_neurips_feurer_example.py
@@ -13,12 +13,10 @@
 | Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter
 | In *Advances in Neural Information Processing Systems 28*, 2015
 | Available at https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
-"""  # noqa F401
+"""
 
 # License: BSD 3-Clause
 
-import pandas as pd
-
 import openml
 
 ####################################################################################################
@@ -68,7 +66,7 @@
 
 task_ids = []
 for did in dataset_ids:
-    tasks_ = list(tasks.query("did == {}".format(did)).tid)
+    tasks_ = list(tasks.query(f"did == {did}").tid)
     if len(tasks_) >= 1:  # if there are multiple task, take the one with lowest ID (oldest).
         task_id = min(tasks_)
     else:

diff --git a/examples/_external_or_deprecated/2018_ida_strang_example.py b/examples/_external_or_deprecated/2018_ida_strang_example.py
@@ -17,8 +17,8 @@
 # License: BSD 3-Clause
 
 import matplotlib.pyplot as plt
+
 import openml
-import pandas as pd
 
 ##############################################################################
 # A basic step for each data-mining or machine learning task is to determine
@@ -86,10 +86,9 @@
 def determine_class(val_lin, val_nonlin):
     if val_lin < val_nonlin:
         return class_values[0]
-    elif val_nonlin < val_lin:
+    if val_nonlin < val_lin:
         return class_values[1]
-    else:
-        return class_values[2]
+    return class_values[2]
 
 
 evaluations["class"] = evaluations.apply(

diff --git a/examples/_external_or_deprecated/2018_kdd_rijn_example.py b/examples/_external_or_deprecated/2018_kdd_rijn_example.py
@@ -32,24 +32,24 @@
 
 import sys
 
-if sys.platform == "win32":  # noqa
+if sys.platform == "win32":
     print(
         "The pyrfr library (requirement of fanova) can currently not be installed on Windows systems"
     )
-    exit()
+    sys.exit()
 
 # DEPRECATED EXAMPLE -- Avoid running this code in our CI/CD pipeline
 print("This example is deprecated, remove the `if False` in this code to use it manually.")
 if False:
     import json
+
     import fanova
     import matplotlib.pyplot as plt
     import pandas as pd
     import seaborn as sns
 
     import openml
 
-
     ##############################################################################
     # With the advent of automated machine learning, automated hyperparameter
     # optimization methods are by now routinely used in data mining. However, this
@@ -80,7 +80,7 @@
     # important when it is put on a log-scale. All these simplifications can be
     # addressed by defining a ConfigSpace. For a more elaborated example that uses
     # this, please see:
-    # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401
+    # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py
 
     suite = openml.study.get_suite("OpenML100")
     flow_id = 7707
@@ -97,8 +97,7 @@
         if limit_nr_tasks is not None and idx >= limit_nr_tasks:
             continue
         print(
-            "Starting with task %d (%d/%d)"
-            % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks)
+            f"Starting with task {task_id} ({idx + 1}/{len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks})"
         )
         # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
         evals = openml.evaluations.list_evaluations_setups(
@@ -121,13 +120,13 @@
                 [
                     dict(
                         **{name: json.loads(value) for name, value in setup["parameters"].items()},
-                        **{performance_column: setup[performance_column]}
+                        **{performance_column: setup[performance_column]},
                     )
                     for _, setup in evals.iterrows()
                 ]
             )
         except json.decoder.JSONDecodeError as e:
-            print("Task %d error: %s" % (task_id, e))
+            print(f"Task {task_id} error: {e}")
             continue
         # apply our filters, to have only the setups that comply to the hyperparameters we want
         for filter_key, filter_value in parameter_filters.items():
@@ -156,19 +155,21 @@
             Y=setups_evals[performance_column].to_numpy(),
             n_trees=n_trees,
         )
-        for idx, pname in enumerate(parameter_names):
+        for idx, pname in enumerate(parameter_names):  # noqa: PLW2901
             try:
                 fanova_results.append(
                     {
                         "hyperparameter": pname.split(".")[-1],
-                        "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"],
+                        "fanova": evaluator.quantify_importance([idx])[(idx,)][
+                            "individual importance"
+                        ],
                     }
                 )
             except RuntimeError as e:
                 # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant
                 # for all configurations (there is no variance). We will skip these tasks (like the authors did in the
                 # paper).
-                print("Task %d error: %s" % (task_id, e))
+                print(f"Task {task_id} error: {e}")
                 continue
 
     # transform ``fanova_results`` from a list of dicts into a DataFrame

diff --git a/examples/_external_or_deprecated/2018_neurips_perrone_example.py b/examples/_external_or_deprecated/2018_neurips_perrone_example.py
@@ -27,24 +27,25 @@
 
 # License: BSD 3-Clause
 
-import openml
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
-from sklearn.pipeline import Pipeline
-from sklearn.impute import SimpleImputer
 from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.impute import SimpleImputer
 from sklearn.metrics import mean_squared_error
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
-from sklearn.ensemble import RandomForestRegressor
+
+import openml
 
 flow_type = "svm"  # this example will use the smaller svm flow evaluations
 ############################################################################
 # The subsequent functions are defined to fetch tasks, flows, evaluations and preprocess them into
 # a tabular format that can be used to build models.
 
 
-def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_curve"):
+def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_curve"):  # noqa: FBT002
     """
     Fetch a list of evaluations based on the flows and tasks used in the experiments.
 
@@ -101,7 +102,10 @@ def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_cu
 
 
 def create_table_from_evaluations(
-    eval_df, flow_type="svm", run_count=np.iinfo(np.int64).max, task_ids=None
+    eval_df,
+    flow_type="svm",
+    run_count=np.iinfo(np.int64).max,  # noqa: B008
+    task_ids=None,
 ):
     """
     Create a tabular data with its ground truth from a dataframe of evaluations.
@@ -206,7 +210,7 @@ def list_categorical_attributes(flow_type="svm"):
 model.fit(X, y)
 y_pred = model.predict(X)
 
-print("Training RMSE : {:.5}".format(mean_squared_error(y, y_pred)))
+print(f"Training RMSE : {mean_squared_error(y, y_pred):.5}")
 
 
 #############################################################################

diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py
@@ -100,7 +100,7 @@ def objective(trial: optuna.Trial) -> Pipeline:
             run.publish()
 
             logger.log(1, f"Run was uploaded to - {run.openml_url}")
-        except Exception as e:
+        except Exception as e:  # noqa: BLE001
             logger.log(1, f"Could not publish run - {e}")
     else:
         logger.log(

diff --git a/examples/_external_or_deprecated/fetch_runtimes_tutorial.py b/examples/_external_or_deprecated/fetch_runtimes_tutorial.py
@@ -39,17 +39,16 @@
 #
 # * (Case 5) Running models that do not release the Python Global Interpreter Lock (GIL)
 
-import openml
 import numpy as np
-from matplotlib import pyplot as plt
 from joblib.parallel import parallel_backend
-
-from sklearn.naive_bayes import GaussianNB
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.neural_network import MLPClassifier
+from matplotlib import pyplot as plt
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
+from sklearn.naive_bayes import GaussianNB
+from sklearn.neural_network import MLPClassifier
+from sklearn.tree import DecisionTreeClassifier
 
+import openml
 
 # %% [markdown]
 # # Preparing tasks and scikit-learn models
@@ -63,12 +62,7 @@
 # Viewing associated data
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id,
-        n_repeats,
-        n_folds,
-        n_samples,
-    )
+    f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
 )
 
 
@@ -101,7 +95,7 @@ def print_compare_runtimes(measures):
 measures = run1.fold_evaluations
 
 print("The timing and performance metrics available: ")
-for key in measures.keys():
+for key in measures:
     print(key)
 print()
 
@@ -206,7 +200,6 @@ def print_compare_runtimes(measures):
 # included in the `wall_clock_time_millis_training` measure recorded.
 
 # %%
-from sklearn.model_selection import GridSearchCV
 
 clf = RandomForestClassifier(n_estimators=10, n_jobs=2)
 
@@ -284,22 +277,18 @@ def print_compare_runtimes(measures):
 
 # %%
 
+
 def extract_refit_time(run, repeat, fold):
-    refit_time = (
+    return (
         run.fold_evaluations["wall_clock_time_millis"][repeat][fold]
         - run.fold_evaluations["wall_clock_time_millis_training"][repeat][fold]
         - run.fold_evaluations["wall_clock_time_millis_testing"][repeat][fold]
     )
-    return refit_time
 
 
 for repeat in range(n_repeats):
     for fold in range(n_folds):
-        print(
-            "Repeat #{}-Fold #{}: {:.4f}".format(
-                repeat, fold, extract_refit_time(run4, repeat, fold)
-            )
-        )
+        print(f"Repeat #{repeat}-Fold #{fold}: {extract_refit_time(run4, repeat, fold):.4f}")
 
 # %% [markdown]
 # Along with the GridSearchCV already used above, we demonstrate how such

diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py
@@ -9,7 +9,6 @@
 
 import openml
 
-
 # %% [markdown]
 # .. warning::
 #    .. include:: ../../test_server_usage_warning.txt
@@ -48,7 +47,7 @@
 # %% [markdown]
 # ## 2. Obtaining a flow given its name
 # The schema of a flow is given in XSD (
-# [here](https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd)).  # noqa E501
+# [here](https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd)).
 # Only two fields are required, a unique name, and an external version. While it should be pretty
 # obvious why we need a name, the need for the additional external version information might not
 # be immediately clear. However, this information is very important as it allows to have multiple

diff --git a/examples/_external_or_deprecated/flows_and_runs_tutorial.py b/examples/_external_or_deprecated/flows_and_runs_tutorial.py
@@ -3,8 +3,7 @@
 # This tutorial covers how to train/run a model and how to upload the results.
 
 # %%
-import openml
-from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree
+from sklearn import compose, ensemble, impute, neighbors, pipeline, preprocessing, tree
 
 import openml
 

diff --git a/examples/_external_or_deprecated/plot_svm_hyperparameters_tutorial.py b/examples/_external_or_deprecated/plot_svm_hyperparameters_tutorial.py
@@ -2,9 +2,10 @@
 # # Plotting hyperparameter surfaces
 
 # %%
-import openml
 import numpy as np
 
+import openml
+
 # %% [markdown]
 # # First step - obtaining the data
 # First, we need to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are