From 06b1beb7c969cc46b6aa973e45d61cb1a2b23c86 Mon Sep 17 00:00:00 2001 From: Anvitha Date: Mon, 17 Nov 2025 01:40:55 +0530 Subject: [PATCH 1/3] fixed few broken links and cleaned up grammar --- docs/details.md | 8 ++++---- docs/extensions.md | 12 ++++++------ docs/index.md | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/details.md b/docs/details.md index bf4b0cd2b..c1bb8fa9f 100644 --- a/docs/details.md +++ b/docs/details.md @@ -5,17 +5,17 @@ This document highlights some of the more advanced features of ## Configuration -The configuration file resides in a directory `.config/openml` in the +The configuration file resides in a directory `~/.config/openml` in the home directory of the user and is called config (More specifically, it -resides in the [configuration directory specified by the XDGB Base +resides in the [configuration directory specified by the XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html)). It consists of `key = value` pairs which are separated by newlines. The following keys are defined: - apikey: required to access the server. -- server: the server to connect to (default: `http://www.openml.org`). - For connection to the test server, set this to `test.openml.org`. +- server: the server to connect to (default: `https://www.openml.org`). + For connection to the test server, set this to `https://test.openml.org`. - cachedir: the root folder where the cache file directories should be created. If not given, will default to `~/.openml/cache` - avoid_duplicate_runs: if set to `True` (default), when certain functions diff --git a/docs/extensions.md b/docs/extensions.md index 858447440..e53ade266 100644 --- a/docs/extensions.md +++ b/docs/extensions.md @@ -27,8 +27,8 @@ to upload it. However, in order to simplify the process of uploading flows and runs from a specific library, an additional interface can be built. The OpenML-Python team does not have the capacity to develop and maintain such interfaces on its own. For this reason, we have built an -extension interface to allows others to contribute back. Building a -suitable extension for therefore requires an understanding of the +extension interface to allow others to contribute back. Building a +suitable extension therefore requires an understanding of the current OpenML-Python support. [This tutorial](../examples/Basics/simple_flows_and_runs_tutorial) shows how the scikit-learn @@ -65,8 +65,8 @@ and can be implemented in several lines of code. Typically, the flow-dependency field is used to check whether the specific library is present, and no unknown libraries are present there. - - `can_handle_model`: Similar as - `can_handle_flow`:, except that in + - `can_handle_model`: Similar to + `can_handle_flow`, except that in this case a Python object is given. As such, in many cases, this method can be implemented by checking whether this adheres to a certain base class. @@ -138,7 +138,7 @@ Each extension created should be a stand-alone repository, compatible with the [OpenML-Python repository](https://github.com/openml/openml-python). The extension repository should work off-the-shelf with *OpenML-Python* installed. -Create a public Github repo with the following directory structure: +Create a public GitHub repo with the following directory structure: | [repo name] | |-- [extension name] @@ -149,7 +149,7 @@ Create a public Github repo with the following directory structure: ### Recommended - Test cases to keep the extension up to date with the - Openml-Python upstream changes. + OpenML-Python upstream changes. - Documentation of the extension API, especially if any new functionality added to OpenML-Python\'s extension design. - Examples to show how the new extension interfaces and works with diff --git a/docs/index.md b/docs/index.md index 1058c3956..5a1d698f3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -100,4 +100,4 @@ appreciate a reference to our JMLR-MLOSS paper Feurer, Matthias, et al. "OpenML-Python: an extensible Python API for OpenML." - _Journal of Machine Learning Research_ 22.100 (2021):1−5. + _Journal of Machine Learning Research_ 22.100 (2021): 1−5. From 674bcaca91d920002e1d975a2b06afb1620bcce6 Mon Sep 17 00:00:00 2001 From: Anvitha Date: Wed, 26 Nov 2025 16:11:17 +0530 Subject: [PATCH 2/3] handling empty strings --- openml/datasets/functions.py | 5 ++++- tests/files/org/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 tests/files/org/.DS_Store diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index ac5466a44..ce36a6b59 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -458,9 +458,12 @@ def get_dataset( # noqa: C901, PLR0912 if cache_format not in ["feather", "pickle"]: raise ValueError( "cache_format must be one of 'feather' or 'pickle. " - f"Invalid format specified: {cache_format}", + f"Invalid format specified: {cache_format}. Expected one of: 'pickle' or 'feather'", ) + if isinstance(dataset_id, str) and dataset_id.strip() == "": + raise ValueError("dataset_id cannot be an empty string.") + if isinstance(dataset_id, str): try: dataset_id = int(dataset_id) diff --git a/tests/files/org/.DS_Store b/tests/files/org/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..18ac36a0ad3e2540be1369554de55f12881fa9e4 GIT binary patch literal 6148 zcmeHKISv9b4733uBpOP}e1RWC2wuPkC_pr@AS9@_;$1wA@zKCS2Mroa&LoZ}QKne0 zMMUS9%}it>A|tq=+-&HY?VES3mk|ZRamGRR*VVb%ADdk#`+dN;LpjPxw$i-tZI4C; zr~nn90#twsd{=?2ucPsIkL77pfC~J90``3 Date: Wed, 26 Nov 2025 16:52:47 +0530 Subject: [PATCH 3/3] improved error messages --- openml/runs/functions.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 666b75c37..62557bedd 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -223,7 +223,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 Result of the run. """ if flow_tags is not None and not isinstance(flow_tags, list): - raise ValueError("flow_tags should be a list") + raise ValueError("flow_tags should be a list, but received '{type(flow_tags).__name__}'. ") if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs @@ -242,7 +242,9 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 task, flow = flow, task if task.task_id is None: - raise ValueError("The task should be published at OpenML") + raise ValueError( + "The task should be published at OpenML" "Publish the task to OpenML before running it." + ) if flow.model is None: flow.model = flow.extension.flow_to_model(flow) @@ -257,8 +259,11 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 if isinstance(flow.flow_id, int) and flow_id != flow.flow_id: if flow_id is not False: raise PyOpenMLError( - f"Local flow_id does not match server flow_id: '{flow.flow_id}' vs '{flow_id}'", + f"Local flow_id '{flow.flow_id}' does not match the flow_id '{flow_id}' found" + "on the OpenML server. Ensure your local object is the most recent version" + "retrieved from OpenML or check that the correct Flow ID was provided." ) + raise PyOpenMLError( "Flow does not exist on the server, but 'flow.flow_id' is not None." ) @@ -648,7 +653,11 @@ def _calculate_local_measure( # type: ignore arff_datacontent.append(arff_line) else: - raise TypeError(type(task)) + raise TypeError( + f"Unsupported task type '{type(task).__name__}'. " + "Expected one of: OpenMLClassificationTask, OpenMLRegressionTask, " + "OpenMLClusteringTask, or OpenMLLearningCurveTask." + ) for measure in user_defined_measures_fold: if measure not in user_defined_measures_per_fold: @@ -674,7 +683,9 @@ def _calculate_local_measure( # type: ignore if len(traces) > 0: if len(traces) != len(jobs): raise ValueError( - f"Did not find enough traces (expected {len(jobs)}, found {len(traces)})", + "Mismatch in number of HPO traces: " + f"Expected {len(jobs)}, but only {len(traces)} were found." + "Check for incomplete model evaluations or worker failures." ) trace = OpenMLRunTrace.merge_traces(traces)