From 06b1beb7c969cc46b6aa973e45d61cb1a2b23c86 Mon Sep 17 00:00:00 2001
From: Anvitha <anvitha.gowda098@gmail.com>
Date: Mon, 17 Nov 2025 01:40:55 +0530
Subject: [PATCH 1/3] fixed few broken links and cleaned up grammar

---
 docs/details.md    |  8 ++++----
 docs/extensions.md | 12 ++++++------
 docs/index.md      |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/details.md b/docs/details.md
index bf4b0cd2b..c1bb8fa9f 100644
--- a/docs/details.md
+++ b/docs/details.md
@@ -5,17 +5,17 @@ This document highlights some of the more advanced features of
 
 ## Configuration
 
-The configuration file resides in a directory `.config/openml` in the
+The configuration file resides in a directory `~/.config/openml` in the
 home directory of the user and is called config (More specifically, it
-resides in the [configuration directory specified by the XDGB Base
+resides in the [configuration directory specified by the XDG Base
 Directory
 Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html)).
 It consists of `key = value` pairs which are separated by newlines. The
 following keys are defined:
 
 - apikey: required to access the server.
-- server: the server to connect to (default: `http://www.openml.org`).
-          For connection to the test server, set this to `test.openml.org`.
+- server: the server to connect to (default: `https://www.openml.org`).
+          For connection to the test server, set this to `https://test.openml.org`.
 - cachedir: the root folder where the cache file directories should be created.
     If not given, will default to `~/.openml/cache`
 - avoid_duplicate_runs: if set to `True` (default), when certain functions
diff --git a/docs/extensions.md b/docs/extensions.md
index 858447440..e53ade266 100644
--- a/docs/extensions.md
+++ b/docs/extensions.md
@@ -27,8 +27,8 @@ to upload it. However, in order to simplify the process of uploading
 flows and runs from a specific library, an additional interface can be
 built. The OpenML-Python team does not have the capacity to develop and
 maintain such interfaces on its own. For this reason, we have built an
-extension interface to allows others to contribute back. Building a
-suitable extension for therefore requires an understanding of the
+extension interface to allow others to contribute back. Building a
+suitable extension therefore requires an understanding of the
 current OpenML-Python support.
 
 [This tutorial](../examples/Basics/simple_flows_and_runs_tutorial) shows how the scikit-learn 
@@ -65,8 +65,8 @@ and can be implemented in several lines of code.
         Typically, the flow-dependency field is used to check whether
         the specific library is present, and no unknown libraries are
         present there.
-    -   `can_handle_model`: Similar as
-        `can_handle_flow`:, except that in
+    -   `can_handle_model`: Similar to
+        `can_handle_flow`, except that in
         this case a Python object is given. As such, in many cases, this
         method can be implemented by checking whether this adheres to a
         certain base class.
@@ -138,7 +138,7 @@ Each extension created should be a stand-alone repository, compatible
 with the [OpenML-Python repository](https://github.com/openml/openml-python). 
 The extension repository should work off-the-shelf with *OpenML-Python* installed.
 
-Create a public Github repo with the following directory structure:
+Create a public GitHub repo with the following directory structure:
 
     | [repo name]
     |    |-- [extension name]
@@ -149,7 +149,7 @@ Create a public Github repo with the following directory structure:
 ### Recommended
 
 -   Test cases to keep the extension up to date with the
-    Openml-Python upstream changes.
+    OpenML-Python upstream changes.
 -   Documentation of the extension API, especially if any new
     functionality added to OpenML-Python\'s extension design.
 -   Examples to show how the new extension interfaces and works with
diff --git a/docs/index.md b/docs/index.md
index 1058c3956..5a1d698f3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -100,4 +100,4 @@ appreciate a reference to our JMLR-MLOSS paper
 
     Feurer, Matthias, et al. 
     "OpenML-Python: an extensible Python API for OpenML."
-    _Journal of Machine Learning Research_ 22.100 (2021):1−5.
+    _Journal of Machine Learning Research_ 22.100 (2021): 1−5.

From 674bcaca91d920002e1d975a2b06afb1620bcce6 Mon Sep 17 00:00:00 2001
From: Anvitha <anvitha.gowda098@gmail.com>
Date: Wed, 26 Nov 2025 16:11:17 +0530
Subject: [PATCH 2/3] handling empty strings

---
 openml/datasets/functions.py |   5 ++++-
 tests/files/org/.DS_Store    | Bin 0 -> 6148 bytes
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 tests/files/org/.DS_Store

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index ac5466a44..ce36a6b59 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -458,9 +458,12 @@ def get_dataset(  # noqa: C901, PLR0912
     if cache_format not in ["feather", "pickle"]:
         raise ValueError(
             "cache_format must be one of 'feather' or 'pickle. "
-            f"Invalid format specified: {cache_format}",
+            f"Invalid format specified: {cache_format}. Expected one of: 'pickle' or 'feather'",
         )
 
+    if isinstance(dataset_id, str) and dataset_id.strip() == "":
+        raise ValueError("dataset_id cannot be an empty string.")
+
     if isinstance(dataset_id, str):
         try:
             dataset_id = int(dataset_id)
diff --git a/tests/files/org/.DS_Store b/tests/files/org/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..18ac36a0ad3e2540be1369554de55f12881fa9e4
GIT binary patch
literal 6148
zcmeHKISv9b4733uBpOP}e1RWC2wuPkC_pr@AS9@_;$1wA@zKCS2Mroa&LoZ}QKne0
zMMUS9%}it>A|tq=+-&HY?VES3mk|ZRamGRR*VVb%ADdk#`+dN;LpjPxw$i-tZI4C;
zr~nn90#twsd{=?2ucPsIkL77pfC~J90``3<aKoC|2KuK1gSP;{0m5#WdoKYj768`7
zHV_e*1{D}o%@#w0j(Ewun%D*gT{N2y&6_nl6!qJ2e(`kC8px3fP=RL!`mvl?{a?aA
z^#9K!uBZSN_$vi;uvjhTcv9BZ&f~1s7Wf)&Id`}j=1#%j<rwJY7z-=MQ%{P#Vsq@*
V#5U0Bh&vs~p8?Z_Mg@MYzyr7$6{`RM

literal 0
HcmV?d00001


From 1c783cd8f1eb0c7a770236f6b80707aa2e9ff7b7 Mon Sep 17 00:00:00 2001
From: Anvitha <anvitha.gowda098@gmail.com>
Date: Wed, 26 Nov 2025 16:52:47 +0530
Subject: [PATCH 3/3] improved error messages

---
 openml/runs/functions.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 666b75c37..62557bedd 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -223,7 +223,7 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         Result of the run.
     """
     if flow_tags is not None and not isinstance(flow_tags, list):
-        raise ValueError("flow_tags should be a list")
+        raise ValueError("flow_tags should be a list, but received '{type(flow_tags).__name__}'. ")
 
     if avoid_duplicate_runs is None:
         avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -242,7 +242,9 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         task, flow = flow, task
 
     if task.task_id is None:
-        raise ValueError("The task should be published at OpenML")
+        raise ValueError(
+            "The task should be published at OpenML" "Publish the task to OpenML before running it."
+        )
 
     if flow.model is None:
         flow.model = flow.extension.flow_to_model(flow)
@@ -257,8 +259,11 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
         if isinstance(flow.flow_id, int) and flow_id != flow.flow_id:
             if flow_id is not False:
                 raise PyOpenMLError(
-                    f"Local flow_id does not match server flow_id: '{flow.flow_id}' vs '{flow_id}'",
+                    f"Local flow_id '{flow.flow_id}' does not match the flow_id '{flow_id}' found"
+                    "on the OpenML server. Ensure your local object is the most recent version"
+                    "retrieved from OpenML or check that the correct Flow ID was provided."
                 )
+
             raise PyOpenMLError(
                 "Flow does not exist on the server, but 'flow.flow_id' is not None."
             )
@@ -648,7 +653,11 @@ def _calculate_local_measure(  # type: ignore
                 arff_datacontent.append(arff_line)
 
         else:
-            raise TypeError(type(task))
+            raise TypeError(
+                f"Unsupported task type '{type(task).__name__}'. "
+                "Expected one of: OpenMLClassificationTask, OpenMLRegressionTask, "
+                "OpenMLClusteringTask, or OpenMLLearningCurveTask."
+            )
 
         for measure in user_defined_measures_fold:
             if measure not in user_defined_measures_per_fold:
@@ -674,7 +683,9 @@ def _calculate_local_measure(  # type: ignore
     if len(traces) > 0:
         if len(traces) != len(jobs):
             raise ValueError(
-                f"Did not find enough traces (expected {len(jobs)}, found {len(traces)})",
+                "Mismatch in number of HPO traces: "
+                f"Expected {len(jobs)}, but only {len(traces)} were found."
+                "Check for incomplete model evaluations or worker failures."
             )
 
         trace = OpenMLRunTrace.merge_traces(traces)