openml · anvithagowda098 · Nov 16, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/docs/details.md b/docs/details.md
@@ -5,17 +5,17 @@ This document highlights some of the more advanced features of
 
 ## Configuration
 
-The configuration file resides in a directory `.config/openml` in the
+The configuration file resides in a directory `~/.config/openml` in the
 home directory of the user and is called config (More specifically, it
-resides in the [configuration directory specified by the XDGB Base
+resides in the [configuration directory specified by the XDG Base
 Directory
 Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html)).
 It consists of `key = value` pairs which are separated by newlines. The
 following keys are defined:
 
 - apikey: required to access the server.
-- server: the server to connect to (default: `http://www.openml.org`).
-          For connection to the test server, set this to `test.openml.org`.
+- server: the server to connect to (default: `https://www.openml.org`).
+          For connection to the test server, set this to `https://test.openml.org`.
 - cachedir: the root folder where the cache file directories should be created.
     If not given, will default to `~/.openml/cache`
 - avoid_duplicate_runs: if set to `True` (default), when certain functions

diff --git a/docs/extensions.md b/docs/extensions.md
@@ -27,8 +27,8 @@ to upload it. However, in order to simplify the process of uploading
 flows and runs from a specific library, an additional interface can be
 built. The OpenML-Python team does not have the capacity to develop and
 maintain such interfaces on its own. For this reason, we have built an
-extension interface to allows others to contribute back. Building a
-suitable extension for therefore requires an understanding of the
+extension interface to allow others to contribute back. Building a
+suitable extension therefore requires an understanding of the
 current OpenML-Python support.
 
 [This tutorial](../examples/Basics/simple_flows_and_runs_tutorial) shows how the scikit-learn 
@@ -65,8 +65,8 @@ and can be implemented in several lines of code.
         Typically, the flow-dependency field is used to check whether
         the specific library is present, and no unknown libraries are
         present there.
-    -   `can_handle_model`: Similar as
-        `can_handle_flow`:, except that in
+    -   `can_handle_model`: Similar to
+        `can_handle_flow`, except that in
         this case a Python object is given. As such, in many cases, this
         method can be implemented by checking whether this adheres to a
         certain base class.
@@ -138,7 +138,7 @@ Each extension created should be a stand-alone repository, compatible
 with the [OpenML-Python repository](https://github.com/openml/openml-python). 
 The extension repository should work off-the-shelf with *OpenML-Python* installed.
 
-Create a public Github repo with the following directory structure:
+Create a public GitHub repo with the following directory structure:
 
     | [repo name]
     |    |-- [extension name]
@@ -149,7 +149,7 @@ Create a public Github repo with the following directory structure:
 ### Recommended
 
 -   Test cases to keep the extension up to date with the
-    Openml-Python upstream changes.
+    OpenML-Python upstream changes.
 -   Documentation of the extension API, especially if any new
     functionality added to OpenML-Python\'s extension design.
 -   Examples to show how the new extension interfaces and works with

diff --git a/docs/index.md b/docs/index.md
@@ -100,4 +100,4 @@ appreciate a reference to our JMLR-MLOSS paper
 
     Feurer, Matthias, et al. 
     "OpenML-Python: an extensible Python API for OpenML."
-    _Journal of Machine Learning Research_ 22.100 (2021):1−5.
+    _Journal of Machine Learning Research_ 22.100 (2021): 1−5.
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -458,9 +458,12 @@ def get_dataset(  # noqa: C901, PLR0912
     if cache_format not in ["feather", "pickle"]:
         raise ValueError(
             "cache_format must be one of 'feather' or 'pickle. "
-            f"Invalid format specified: {cache_format}",
+            f"Invalid format specified: {cache_format}. Expected one of: 'pickle' or 'feather'",
         )
 
+    if isinstance(dataset_id, str) and dataset_id.strip() == "":
+        raise ValueError("dataset_id cannot be an empty string.")
+
     if isinstance(dataset_id, str):
         try:
             dataset_id = int(dataset_id)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -223,7 +223,7 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         Result of the run.
     """
     if flow_tags is not None and not isinstance(flow_tags, list):
-        raise ValueError("flow_tags should be a list")
+        raise ValueError("flow_tags should be a list, but received '{type(flow_tags).__name__}'. ")
 
     if avoid_duplicate_runs is None:
         avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -242,7 +242,9 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         task, flow = flow, task
 
     if task.task_id is None:
-        raise ValueError("The task should be published at OpenML")
+        raise ValueError(
+            "The task should be published at OpenML" "Publish the task to OpenML before running it."
+        )
 
     if flow.model is None:
         flow.model = flow.extension.flow_to_model(flow)
@@ -257,8 +259,11 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         if isinstance(flow.flow_id, int) and flow_id != flow.flow_id:
             if flow_id is not False:
                 raise PyOpenMLError(
-                    f"Local flow_id does not match server flow_id: '{flow.flow_id}' vs '{flow_id}'",
+                    f"Local flow_id '{flow.flow_id}' does not match the flow_id '{flow_id}' found"
+                    "on the OpenML server. Ensure your local object is the most recent version"
+                    "retrieved from OpenML or check that the correct Flow ID was provided."
                 )
+
             raise PyOpenMLError(
                 "Flow does not exist on the server, but 'flow.flow_id' is not None."
             )
@@ -648,7 +653,11 @@ def _calculate_local_measure(  # type: ignore
                 arff_datacontent.append(arff_line)
 
         else:
-            raise TypeError(type(task))
+            raise TypeError(
+                f"Unsupported task type '{type(task).__name__}'. "
+                "Expected one of: OpenMLClassificationTask, OpenMLRegressionTask, "
+                "OpenMLClusteringTask, or OpenMLLearningCurveTask."
+            )
 
         for measure in user_defined_measures_fold:
             if measure not in user_defined_measures_per_fold:
@@ -674,7 +683,9 @@ def _calculate_local_measure(  # type: ignore
     if len(traces) > 0:
         if len(traces) != len(jobs):
             raise ValueError(
-                f"Did not find enough traces (expected {len(jobs)}, found {len(traces)})",
+                "Mismatch in number of HPO traces: "
+                f"Expected {len(jobs)}, but only {len(traces)} were found."
+                "Check for incomplete model evaluations or worker failures."
             )
 
         trace = OpenMLRunTrace.merge_traces(traces)

diff --git a/tests/files/org/.DS_Store b/tests/files/org/.DS_Store