diff --git a/docs/index.md b/docs/index.md index 1058c3956..ca06267f8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -23,6 +23,15 @@ dataset = openml.datasets.get_dataset("credit-g") # or by ID get_dataset(31) X, y, categorical_indicator, attribute_names = dataset.get_data(target="class") ``` +Get a missing-value summary for a dataset: + +```python +import openml + +dataset = openml.datasets.get_dataset(31) +summary = dataset.get_missing_summary() +``` + Get a [task](https://docs.openml.org/concepts/tasks/) for [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31): ```python diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index fa83d2b8a..cc82d87d1 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -794,6 +794,26 @@ def get_data( # noqa: C901 assert isinstance(y, pd.Series) return x, y, categorical_mask, attribute_names + def get_missing_summary(self) -> dict: + """Returns a missing-value summary for the dataset. + + Returns + ------- + dict + { + "n_missing_total": int, + "missing_per_column": dict + } + """ + df, _, _, _ = self.get_data() + missing_per_column = df.isna().sum().to_dict() + n_missing_total = sum(missing_per_column.values()) + + return { + "n_missing_total": n_missing_total, + "missing_per_column": missing_per_column, + } + def _load_features(self) -> None: """Load the features metadata from the server and store it in the dataset object.""" # Delayed Import to avoid circular imports or having to import all of dataset.functions to diff --git a/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pkl.py3 b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pkl.py3 new file mode 100644 index 000000000..95d46f371 Binary files /dev/null and b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pkl.py3 differ diff --git a/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pq b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pq new file mode 100644 index 000000000..ea361a831 Binary files /dev/null and b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/dataset_40945.pq differ diff --git a/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/description.xml b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/description.xml new file mode 100644 index 000000000..59766a659 --- /dev/null +++ b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/datasets/40945/description.xml @@ -0,0 +1,26 @@ + + 40945 + Titanic + 1 + **Author**: Frank E. Harrell Jr., Thomas Cason +**Source**: [Vanderbilt Biostatistics](http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.html) +**Please cite**: + +The original Titanic dataset, describing the survival status of individual passengers on the Titanic. The titanic data does not contain information from the crew, but it does contain actual ages of half of the passengers. The principal source for data about Titanic passengers is the Encyclopedia Titanica. The datasets used here were begun by a variety of researchers. One of the original sources is Eaton & Haas (1994) Titanic: Triumph and Tragedy, Patrick Stephens Ltd, which includes a passenger list created by many researchers and edited by Michael A. Findlay. + +Thomas Cason of UVa has greatly updated and improved the titanic data frame using the Encyclopedia Titanica and created the dataset here. Some duplicate passengers have been dropped, many errors corrected, many missing ages filled in, and new variables created. + +For more information about how this dataset was constructed: +http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3info.txt + + +### Attribute information + +The variables on our extracted dataset are pclass, survived, name, age, embarked, home.dest, room, ticket, boat, and sex. pclass refers to passenger class (1st, 2nd, 3rd), and is a proxy for socio-economic class. Age is in years, and some infants had fractional values. The titanic2 data frame has no missing data and includes records for the crew, but age is dichotomized at adult vs. child. These data were obtained from Robert Dawson, Saint Mary's University, E-mail. The variables are pclass, age, sex, survived. These data frames are useful for demonstrating many of the functions in Hmisc as well as demonstrating binary logistic regression analysis using the Design library. For more details and references see Simonoff, Jeffrey S (1997): The "unusual episode" and a second statistics course. J Statistics Education, Vol. 5 No. 1. + 10 + ARFF + 2017-10-16T01:17:36 + Public https://api.openml.org/data/v1/download/16826755/Titanic.arff + https://data.openml.org/datasets/0004/40945/dataset_40945.pq 16826755 survived Data ScienceHistoryStatisticstext_data public https://data.openml.org/datasets/0004/40945/dataset_40945.pq active + 2018-10-04 07:19:36 60ac7205eee0ba5045c90b3bba95b1c4 + diff --git a/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/locks/datasets.functions.get_dataset b/openml/tests.test_datasets.test_dataset.OpenMLDatasetTest.test_get_data_pandas/org/openml/www/locks/datasets.functions.get_dataset new file mode 100644 index 000000000..e69de29bb diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/123/description.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/123/description.xml new file mode 100644 index 000000000..5207e21f7 --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/123/description.xml @@ -0,0 +1,19 @@ + + 123 + quake + 1 + **Author**: +**Source**: Unknown - +**Please cite**: + +Dataset from Smoothing Methods in Statistics + (ftp stat.cmu.edu/datasets) + + Simonoff, J.S. (1996). Smoothing Methods in Statistics. New York: Springer-Verlag. + 1 + ARFF + 2014-04-23T13:17:24 + Public https://test.openml.org/data/v1/download/123/quake.arff + 123 richter 1 public active + 2025-06-16 08:08:53 7ede4fd775db9eae5586b2f55c6d98c6 + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.arff b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.arff new file mode 100644 index 000000000..a33cbd81f --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.arff @@ -0,0 +1,863 @@ +% 1. Title: Pima Indians Diabetes Database +% +% 2. Sources: +% (a) Original owners: National Institute of Diabetes and Digestive and +% Kidney Diseases +% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu) +% Research Center, RMI Group Leader +% Applied Physics Laboratory +% The Johns Hopkins University +% Johns Hopkins Road +% Laurel, MD 20707 +% (301) 953-6231 +% (c) Date received: 9 May 1990 +% +% 3. Past Usage: +% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \& +% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast +% the onset of diabetes mellitus. In {\it Proceedings of the Symposium +% on Computer Applications and Medical Care} (pp. 261--265). IEEE +% Computer Society Press. +% +% The diagnostic, binary-valued variable investigated is whether the +% patient shows signs of diabetes according to World Health Organization +% criteria (i.e., if the 2 hour post-load plasma glucose was at least +% 200 mg/dl at any survey examination or if found during routine medical +% care). The population lives near Phoenix, Arizona, USA. +% +% Results: Their ADAP algorithm makes a real-valued prediction between +% 0 and 1. This was transformed into a binary decision using a cutoff of +% 0.448. Using 576 training instances, the sensitivity and specificity +% of their algorithm was 76% on the remaining 192 instances. +% +% 4. Relevant Information: +% Several constraints were placed on the selection of these instances from +% a larger database. In particular, all patients here are females at +% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning +% routine that generates and executes digital analogs of perceptron-like +% devices. It is a unique algorithm; see the paper for details. +% +% 5. Number of Instances: 768 +% +% 6. Number of Attributes: 8 plus class +% +% 7. For Each Attribute: (all numeric-valued) +% 1. Number of times pregnant +% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test +% 3. Diastolic blood pressure (mm Hg) +% 4. Triceps skin fold thickness (mm) +% 5. 2-Hour serum insulin (mu U/ml) +% 6. Body mass index (weight in kg/(height in m)^2) +% 7. Diabetes pedigree function +% 8. Age (years) +% 9. Class variable (0 or 1) +% +% 8. Missing Attribute Values: None +% +% 9. Class Distribution: (class value 1 is interpreted as "tested positive for +% diabetes") +% +% Class Value Number of instances +% 0 500 +% 1 268 +% +% 10. Brief statistical analysis: +% +% Attribute number: Mean: Standard Deviation: +% 1. 3.8 3.4 +% 2. 120.9 32.0 +% 3. 69.1 19.4 +% 4. 20.5 16.0 +% 5. 79.8 115.2 +% 6. 32.0 7.9 +% 7. 0.5 0.3 +% 8. 33.2 11.8 +% +% +% +% +% +% +% Relabeled values in attribute 'class' +% From: 0 To: tested_negative +% From: 1 To: tested_positive +% +@relation pima_diabetes +@attribute 'preg' real +@attribute 'plas' real +@attribute 'pres' real +@attribute 'skin' real +@attribute 'insu' real +@attribute 'mass' real +@attribute 'pedi' real +@attribute 'age' real +@attribute 'class' { tested_negative, tested_positive} +@data +6,148,72,35,0,33.6,0.627,50,tested_positive +1,85,66,29,0,26.6,0.351,31,tested_negative +8,183,64,0,0,23.3,0.672,32,tested_positive +1,89,66,23,94,28.1,0.167,21,tested_negative +0,137,40,35,168,43.1,2.288,33,tested_positive +5,116,74,0,0,25.6,0.201,30,tested_negative +3,78,50,32,88,31,0.248,26,tested_positive +10,115,0,0,0,35.3,0.134,29,tested_negative +2,197,70,45,543,30.5,0.158,53,tested_positive +8,125,96,0,0,0,0.232,54,tested_positive +4,110,92,0,0,37.6,0.191,30,tested_negative +10,168,74,0,0,38,0.537,34,tested_positive +10,139,80,0,0,27.1,1.441,57,tested_negative +1,189,60,23,846,30.1,0.398,59,tested_positive +5,166,72,19,175,25.8,0.587,51,tested_positive +7,100,0,0,0,30,0.484,32,tested_positive +0,118,84,47,230,45.8,0.551,31,tested_positive +7,107,74,0,0,29.6,0.254,31,tested_positive +1,103,30,38,83,43.3,0.183,33,tested_negative +1,115,70,30,96,34.6,0.529,32,tested_positive +3,126,88,41,235,39.3,0.704,27,tested_negative +8,99,84,0,0,35.4,0.388,50,tested_negative +7,196,90,0,0,39.8,0.451,41,tested_positive +9,119,80,35,0,29,0.263,29,tested_positive +11,143,94,33,146,36.6,0.254,51,tested_positive +10,125,70,26,115,31.1,0.205,41,tested_positive +7,147,76,0,0,39.4,0.257,43,tested_positive +1,97,66,15,140,23.2,0.487,22,tested_negative +13,145,82,19,110,22.2,0.245,57,tested_negative +5,117,92,0,0,34.1,0.337,38,tested_negative +5,109,75,26,0,36,0.546,60,tested_negative +3,158,76,36,245,31.6,0.851,28,tested_positive +3,88,58,11,54,24.8,0.267,22,tested_negative +6,92,92,0,0,19.9,0.188,28,tested_negative +10,122,78,31,0,27.6,0.512,45,tested_negative +4,103,60,33,192,24,0.966,33,tested_negative +11,138,76,0,0,33.2,0.42,35,tested_negative +9,102,76,37,0,32.9,0.665,46,tested_positive +2,90,68,42,0,38.2,0.503,27,tested_positive +4,111,72,47,207,37.1,1.39,56,tested_positive +3,180,64,25,70,34,0.271,26,tested_negative +7,133,84,0,0,40.2,0.696,37,tested_negative +7,106,92,18,0,22.7,0.235,48,tested_negative +9,171,110,24,240,45.4,0.721,54,tested_positive +7,159,64,0,0,27.4,0.294,40,tested_negative +0,180,66,39,0,42,1.893,25,tested_positive +1,146,56,0,0,29.7,0.564,29,tested_negative +2,71,70,27,0,28,0.586,22,tested_negative +7,103,66,32,0,39.1,0.344,31,tested_positive +7,105,0,0,0,0,0.305,24,tested_negative +1,103,80,11,82,19.4,0.491,22,tested_negative +1,101,50,15,36,24.2,0.526,26,tested_negative +5,88,66,21,23,24.4,0.342,30,tested_negative +8,176,90,34,300,33.7,0.467,58,tested_positive +7,150,66,42,342,34.7,0.718,42,tested_negative +1,73,50,10,0,23,0.248,21,tested_negative +7,187,68,39,304,37.7,0.254,41,tested_positive +0,100,88,60,110,46.8,0.962,31,tested_negative +0,146,82,0,0,40.5,1.781,44,tested_negative +0,105,64,41,142,41.5,0.173,22,tested_negative +2,84,0,0,0,0,0.304,21,tested_negative +8,133,72,0,0,32.9,0.27,39,tested_positive +5,44,62,0,0,25,0.587,36,tested_negative +2,141,58,34,128,25.4,0.699,24,tested_negative +7,114,66,0,0,32.8,0.258,42,tested_positive +5,99,74,27,0,29,0.203,32,tested_negative +0,109,88,30,0,32.5,0.855,38,tested_positive +2,109,92,0,0,42.7,0.845,54,tested_negative +1,95,66,13,38,19.6,0.334,25,tested_negative +4,146,85,27,100,28.9,0.189,27,tested_negative +2,100,66,20,90,32.9,0.867,28,tested_positive +5,139,64,35,140,28.6,0.411,26,tested_negative +13,126,90,0,0,43.4,0.583,42,tested_positive +4,129,86,20,270,35.1,0.231,23,tested_negative +1,79,75,30,0,32,0.396,22,tested_negative +1,0,48,20,0,24.7,0.14,22,tested_negative +7,62,78,0,0,32.6,0.391,41,tested_negative +5,95,72,33,0,37.7,0.37,27,tested_negative +0,131,0,0,0,43.2,0.27,26,tested_positive +2,112,66,22,0,25,0.307,24,tested_negative +3,113,44,13,0,22.4,0.14,22,tested_negative +2,74,0,0,0,0,0.102,22,tested_negative +7,83,78,26,71,29.3,0.767,36,tested_negative +0,101,65,28,0,24.6,0.237,22,tested_negative +5,137,108,0,0,48.8,0.227,37,tested_positive +2,110,74,29,125,32.4,0.698,27,tested_negative +13,106,72,54,0,36.6,0.178,45,tested_negative +2,100,68,25,71,38.5,0.324,26,tested_negative +15,136,70,32,110,37.1,0.153,43,tested_positive +1,107,68,19,0,26.5,0.165,24,tested_negative +1,80,55,0,0,19.1,0.258,21,tested_negative +4,123,80,15,176,32,0.443,34,tested_negative +7,81,78,40,48,46.7,0.261,42,tested_negative +4,134,72,0,0,23.8,0.277,60,tested_positive +2,142,82,18,64,24.7,0.761,21,tested_negative +6,144,72,27,228,33.9,0.255,40,tested_negative +2,92,62,28,0,31.6,0.13,24,tested_negative +1,71,48,18,76,20.4,0.323,22,tested_negative +6,93,50,30,64,28.7,0.356,23,tested_negative +1,122,90,51,220,49.7,0.325,31,tested_positive +1,163,72,0,0,39,1.222,33,tested_positive +1,151,60,0,0,26.1,0.179,22,tested_negative +0,125,96,0,0,22.5,0.262,21,tested_negative +1,81,72,18,40,26.6,0.283,24,tested_negative +2,85,65,0,0,39.6,0.93,27,tested_negative +1,126,56,29,152,28.7,0.801,21,tested_negative +1,96,122,0,0,22.4,0.207,27,tested_negative +4,144,58,28,140,29.5,0.287,37,tested_negative +3,83,58,31,18,34.3,0.336,25,tested_negative +0,95,85,25,36,37.4,0.247,24,tested_positive +3,171,72,33,135,33.3,0.199,24,tested_positive +8,155,62,26,495,34,0.543,46,tested_positive +1,89,76,34,37,31.2,0.192,23,tested_negative +4,76,62,0,0,34,0.391,25,tested_negative +7,160,54,32,175,30.5,0.588,39,tested_positive +4,146,92,0,0,31.2,0.539,61,tested_positive +5,124,74,0,0,34,0.22,38,tested_positive +5,78,48,0,0,33.7,0.654,25,tested_negative +4,97,60,23,0,28.2,0.443,22,tested_negative +4,99,76,15,51,23.2,0.223,21,tested_negative +0,162,76,56,100,53.2,0.759,25,tested_positive +6,111,64,39,0,34.2,0.26,24,tested_negative +2,107,74,30,100,33.6,0.404,23,tested_negative +5,132,80,0,0,26.8,0.186,69,tested_negative +0,113,76,0,0,33.3,0.278,23,tested_positive +1,88,30,42,99,55,0.496,26,tested_positive +3,120,70,30,135,42.9,0.452,30,tested_negative +1,118,58,36,94,33.3,0.261,23,tested_negative +1,117,88,24,145,34.5,0.403,40,tested_positive +0,105,84,0,0,27.9,0.741,62,tested_positive +4,173,70,14,168,29.7,0.361,33,tested_positive +9,122,56,0,0,33.3,1.114,33,tested_positive +3,170,64,37,225,34.5,0.356,30,tested_positive +8,84,74,31,0,38.3,0.457,39,tested_negative +2,96,68,13,49,21.1,0.647,26,tested_negative +2,125,60,20,140,33.8,0.088,31,tested_negative +0,100,70,26,50,30.8,0.597,21,tested_negative +0,93,60,25,92,28.7,0.532,22,tested_negative +0,129,80,0,0,31.2,0.703,29,tested_negative +5,105,72,29,325,36.9,0.159,28,tested_negative +3,128,78,0,0,21.1,0.268,55,tested_negative +5,106,82,30,0,39.5,0.286,38,tested_negative +2,108,52,26,63,32.5,0.318,22,tested_negative +10,108,66,0,0,32.4,0.272,42,tested_positive +4,154,62,31,284,32.8,0.237,23,tested_negative +0,102,75,23,0,0,0.572,21,tested_negative +9,57,80,37,0,32.8,0.096,41,tested_negative +2,106,64,35,119,30.5,1.4,34,tested_negative +5,147,78,0,0,33.7,0.218,65,tested_negative +2,90,70,17,0,27.3,0.085,22,tested_negative +1,136,74,50,204,37.4,0.399,24,tested_negative +4,114,65,0,0,21.9,0.432,37,tested_negative +9,156,86,28,155,34.3,1.189,42,tested_positive +1,153,82,42,485,40.6,0.687,23,tested_negative +8,188,78,0,0,47.9,0.137,43,tested_positive +7,152,88,44,0,50,0.337,36,tested_positive +2,99,52,15,94,24.6,0.637,21,tested_negative +1,109,56,21,135,25.2,0.833,23,tested_negative +2,88,74,19,53,29,0.229,22,tested_negative +17,163,72,41,114,40.9,0.817,47,tested_positive +4,151,90,38,0,29.7,0.294,36,tested_negative +7,102,74,40,105,37.2,0.204,45,tested_negative +0,114,80,34,285,44.2,0.167,27,tested_negative +2,100,64,23,0,29.7,0.368,21,tested_negative +0,131,88,0,0,31.6,0.743,32,tested_positive +6,104,74,18,156,29.9,0.722,41,tested_positive +3,148,66,25,0,32.5,0.256,22,tested_negative +4,120,68,0,0,29.6,0.709,34,tested_negative +4,110,66,0,0,31.9,0.471,29,tested_negative +3,111,90,12,78,28.4,0.495,29,tested_negative +6,102,82,0,0,30.8,0.18,36,tested_positive +6,134,70,23,130,35.4,0.542,29,tested_positive +2,87,0,23,0,28.9,0.773,25,tested_negative +1,79,60,42,48,43.5,0.678,23,tested_negative +2,75,64,24,55,29.7,0.37,33,tested_negative +8,179,72,42,130,32.7,0.719,36,tested_positive +6,85,78,0,0,31.2,0.382,42,tested_negative +0,129,110,46,130,67.1,0.319,26,tested_positive +5,143,78,0,0,45,0.19,47,tested_negative +5,130,82,0,0,39.1,0.956,37,tested_positive +6,87,80,0,0,23.2,0.084,32,tested_negative +0,119,64,18,92,34.9,0.725,23,tested_negative +1,0,74,20,23,27.7,0.299,21,tested_negative +5,73,60,0,0,26.8,0.268,27,tested_negative +4,141,74,0,0,27.6,0.244,40,tested_negative +7,194,68,28,0,35.9,0.745,41,tested_positive +8,181,68,36,495,30.1,0.615,60,tested_positive +1,128,98,41,58,32,1.321,33,tested_positive +8,109,76,39,114,27.9,0.64,31,tested_positive +5,139,80,35,160,31.6,0.361,25,tested_positive +3,111,62,0,0,22.6,0.142,21,tested_negative +9,123,70,44,94,33.1,0.374,40,tested_negative +7,159,66,0,0,30.4,0.383,36,tested_positive +11,135,0,0,0,52.3,0.578,40,tested_positive +8,85,55,20,0,24.4,0.136,42,tested_negative +5,158,84,41,210,39.4,0.395,29,tested_positive +1,105,58,0,0,24.3,0.187,21,tested_negative +3,107,62,13,48,22.9,0.678,23,tested_positive +4,109,64,44,99,34.8,0.905,26,tested_positive +4,148,60,27,318,30.9,0.15,29,tested_positive +0,113,80,16,0,31,0.874,21,tested_negative +1,138,82,0,0,40.1,0.236,28,tested_negative +0,108,68,20,0,27.3,0.787,32,tested_negative +2,99,70,16,44,20.4,0.235,27,tested_negative +6,103,72,32,190,37.7,0.324,55,tested_negative +5,111,72,28,0,23.9,0.407,27,tested_negative +8,196,76,29,280,37.5,0.605,57,tested_positive +5,162,104,0,0,37.7,0.151,52,tested_positive +1,96,64,27,87,33.2,0.289,21,tested_negative +7,184,84,33,0,35.5,0.355,41,tested_positive +2,81,60,22,0,27.7,0.29,25,tested_negative +0,147,85,54,0,42.8,0.375,24,tested_negative +7,179,95,31,0,34.2,0.164,60,tested_negative +0,140,65,26,130,42.6,0.431,24,tested_positive +9,112,82,32,175,34.2,0.26,36,tested_positive +12,151,70,40,271,41.8,0.742,38,tested_positive +5,109,62,41,129,35.8,0.514,25,tested_positive +6,125,68,30,120,30,0.464,32,tested_negative +5,85,74,22,0,29,1.224,32,tested_positive +5,112,66,0,0,37.8,0.261,41,tested_positive +0,177,60,29,478,34.6,1.072,21,tested_positive +2,158,90,0,0,31.6,0.805,66,tested_positive +7,119,0,0,0,25.2,0.209,37,tested_negative +7,142,60,33,190,28.8,0.687,61,tested_negative +1,100,66,15,56,23.6,0.666,26,tested_negative +1,87,78,27,32,34.6,0.101,22,tested_negative +0,101,76,0,0,35.7,0.198,26,tested_negative +3,162,52,38,0,37.2,0.652,24,tested_positive +4,197,70,39,744,36.7,2.329,31,tested_negative +0,117,80,31,53,45.2,0.089,24,tested_negative +4,142,86,0,0,44,0.645,22,tested_positive +6,134,80,37,370,46.2,0.238,46,tested_positive +1,79,80,25,37,25.4,0.583,22,tested_negative +4,122,68,0,0,35,0.394,29,tested_negative +3,74,68,28,45,29.7,0.293,23,tested_negative +4,171,72,0,0,43.6,0.479,26,tested_positive +7,181,84,21,192,35.9,0.586,51,tested_positive +0,179,90,27,0,44.1,0.686,23,tested_positive +9,164,84,21,0,30.8,0.831,32,tested_positive +0,104,76,0,0,18.4,0.582,27,tested_negative +1,91,64,24,0,29.2,0.192,21,tested_negative +4,91,70,32,88,33.1,0.446,22,tested_negative +3,139,54,0,0,25.6,0.402,22,tested_positive +6,119,50,22,176,27.1,1.318,33,tested_positive +2,146,76,35,194,38.2,0.329,29,tested_negative +9,184,85,15,0,30,1.213,49,tested_positive +10,122,68,0,0,31.2,0.258,41,tested_negative +0,165,90,33,680,52.3,0.427,23,tested_negative +9,124,70,33,402,35.4,0.282,34,tested_negative +1,111,86,19,0,30.1,0.143,23,tested_negative +9,106,52,0,0,31.2,0.38,42,tested_negative +2,129,84,0,0,28,0.284,27,tested_negative +2,90,80,14,55,24.4,0.249,24,tested_negative +0,86,68,32,0,35.8,0.238,25,tested_negative +12,92,62,7,258,27.6,0.926,44,tested_positive +1,113,64,35,0,33.6,0.543,21,tested_positive +3,111,56,39,0,30.1,0.557,30,tested_negative +2,114,68,22,0,28.7,0.092,25,tested_negative +1,193,50,16,375,25.9,0.655,24,tested_negative +11,155,76,28,150,33.3,1.353,51,tested_positive +3,191,68,15,130,30.9,0.299,34,tested_negative +3,141,0,0,0,30,0.761,27,tested_positive +4,95,70,32,0,32.1,0.612,24,tested_negative +3,142,80,15,0,32.4,0.2,63,tested_negative +4,123,62,0,0,32,0.226,35,tested_positive +5,96,74,18,67,33.6,0.997,43,tested_negative +0,138,0,0,0,36.3,0.933,25,tested_positive +2,128,64,42,0,40,1.101,24,tested_negative +0,102,52,0,0,25.1,0.078,21,tested_negative +2,146,0,0,0,27.5,0.24,28,tested_positive +10,101,86,37,0,45.6,1.136,38,tested_positive +2,108,62,32,56,25.2,0.128,21,tested_negative +3,122,78,0,0,23,0.254,40,tested_negative +1,71,78,50,45,33.2,0.422,21,tested_negative +13,106,70,0,0,34.2,0.251,52,tested_negative +2,100,70,52,57,40.5,0.677,25,tested_negative +7,106,60,24,0,26.5,0.296,29,tested_positive +0,104,64,23,116,27.8,0.454,23,tested_negative +5,114,74,0,0,24.9,0.744,57,tested_negative +2,108,62,10,278,25.3,0.881,22,tested_negative +0,146,70,0,0,37.9,0.334,28,tested_positive +10,129,76,28,122,35.9,0.28,39,tested_negative +7,133,88,15,155,32.4,0.262,37,tested_negative +7,161,86,0,0,30.4,0.165,47,tested_positive +2,108,80,0,0,27,0.259,52,tested_positive +7,136,74,26,135,26,0.647,51,tested_negative +5,155,84,44,545,38.7,0.619,34,tested_negative +1,119,86,39,220,45.6,0.808,29,tested_positive +4,96,56,17,49,20.8,0.34,26,tested_negative +5,108,72,43,75,36.1,0.263,33,tested_negative +0,78,88,29,40,36.9,0.434,21,tested_negative +0,107,62,30,74,36.6,0.757,25,tested_positive +2,128,78,37,182,43.3,1.224,31,tested_positive +1,128,48,45,194,40.5,0.613,24,tested_positive +0,161,50,0,0,21.9,0.254,65,tested_negative +6,151,62,31,120,35.5,0.692,28,tested_negative +2,146,70,38,360,28,0.337,29,tested_positive +0,126,84,29,215,30.7,0.52,24,tested_negative +14,100,78,25,184,36.6,0.412,46,tested_positive +8,112,72,0,0,23.6,0.84,58,tested_negative +0,167,0,0,0,32.3,0.839,30,tested_positive +2,144,58,33,135,31.6,0.422,25,tested_positive +5,77,82,41,42,35.8,0.156,35,tested_negative +5,115,98,0,0,52.9,0.209,28,tested_positive +3,150,76,0,0,21,0.207,37,tested_negative +2,120,76,37,105,39.7,0.215,29,tested_negative +10,161,68,23,132,25.5,0.326,47,tested_positive +0,137,68,14,148,24.8,0.143,21,tested_negative +0,128,68,19,180,30.5,1.391,25,tested_positive +2,124,68,28,205,32.9,0.875,30,tested_positive +6,80,66,30,0,26.2,0.313,41,tested_negative +0,106,70,37,148,39.4,0.605,22,tested_negative +2,155,74,17,96,26.6,0.433,27,tested_positive +3,113,50,10,85,29.5,0.626,25,tested_negative +7,109,80,31,0,35.9,1.127,43,tested_positive +2,112,68,22,94,34.1,0.315,26,tested_negative +3,99,80,11,64,19.3,0.284,30,tested_negative +3,182,74,0,0,30.5,0.345,29,tested_positive +3,115,66,39,140,38.1,0.15,28,tested_negative +6,194,78,0,0,23.5,0.129,59,tested_positive +4,129,60,12,231,27.5,0.527,31,tested_negative +3,112,74,30,0,31.6,0.197,25,tested_positive +0,124,70,20,0,27.4,0.254,36,tested_positive +13,152,90,33,29,26.8,0.731,43,tested_positive +2,112,75,32,0,35.7,0.148,21,tested_negative +1,157,72,21,168,25.6,0.123,24,tested_negative +1,122,64,32,156,35.1,0.692,30,tested_positive +10,179,70,0,0,35.1,0.2,37,tested_negative +2,102,86,36,120,45.5,0.127,23,tested_positive +6,105,70,32,68,30.8,0.122,37,tested_negative +8,118,72,19,0,23.1,1.476,46,tested_negative +2,87,58,16,52,32.7,0.166,25,tested_negative +1,180,0,0,0,43.3,0.282,41,tested_positive +12,106,80,0,0,23.6,0.137,44,tested_negative +1,95,60,18,58,23.9,0.26,22,tested_negative +0,165,76,43,255,47.9,0.259,26,tested_negative +0,117,0,0,0,33.8,0.932,44,tested_negative +5,115,76,0,0,31.2,0.343,44,tested_positive +9,152,78,34,171,34.2,0.893,33,tested_positive +7,178,84,0,0,39.9,0.331,41,tested_positive +1,130,70,13,105,25.9,0.472,22,tested_negative +1,95,74,21,73,25.9,0.673,36,tested_negative +1,0,68,35,0,32,0.389,22,tested_negative +5,122,86,0,0,34.7,0.29,33,tested_negative +8,95,72,0,0,36.8,0.485,57,tested_negative +8,126,88,36,108,38.5,0.349,49,tested_negative +1,139,46,19,83,28.7,0.654,22,tested_negative +3,116,0,0,0,23.5,0.187,23,tested_negative +3,99,62,19,74,21.8,0.279,26,tested_negative +5,0,80,32,0,41,0.346,37,tested_positive +4,92,80,0,0,42.2,0.237,29,tested_negative +4,137,84,0,0,31.2,0.252,30,tested_negative +3,61,82,28,0,34.4,0.243,46,tested_negative +1,90,62,12,43,27.2,0.58,24,tested_negative +3,90,78,0,0,42.7,0.559,21,tested_negative +9,165,88,0,0,30.4,0.302,49,tested_positive +1,125,50,40,167,33.3,0.962,28,tested_positive +13,129,0,30,0,39.9,0.569,44,tested_positive +12,88,74,40,54,35.3,0.378,48,tested_negative +1,196,76,36,249,36.5,0.875,29,tested_positive +5,189,64,33,325,31.2,0.583,29,tested_positive +5,158,70,0,0,29.8,0.207,63,tested_negative +5,103,108,37,0,39.2,0.305,65,tested_negative +4,146,78,0,0,38.5,0.52,67,tested_positive +4,147,74,25,293,34.9,0.385,30,tested_negative +5,99,54,28,83,34,0.499,30,tested_negative +6,124,72,0,0,27.6,0.368,29,tested_positive +0,101,64,17,0,21,0.252,21,tested_negative +3,81,86,16,66,27.5,0.306,22,tested_negative +1,133,102,28,140,32.8,0.234,45,tested_positive +3,173,82,48,465,38.4,2.137,25,tested_positive +0,118,64,23,89,0,1.731,21,tested_negative +0,84,64,22,66,35.8,0.545,21,tested_negative +2,105,58,40,94,34.9,0.225,25,tested_negative +2,122,52,43,158,36.2,0.816,28,tested_negative +12,140,82,43,325,39.2,0.528,58,tested_positive +0,98,82,15,84,25.2,0.299,22,tested_negative +1,87,60,37,75,37.2,0.509,22,tested_negative +4,156,75,0,0,48.3,0.238,32,tested_positive +0,93,100,39,72,43.4,1.021,35,tested_negative +1,107,72,30,82,30.8,0.821,24,tested_negative +0,105,68,22,0,20,0.236,22,tested_negative +1,109,60,8,182,25.4,0.947,21,tested_negative +1,90,62,18,59,25.1,1.268,25,tested_negative +1,125,70,24,110,24.3,0.221,25,tested_negative +1,119,54,13,50,22.3,0.205,24,tested_negative +5,116,74,29,0,32.3,0.66,35,tested_positive +8,105,100,36,0,43.3,0.239,45,tested_positive +5,144,82,26,285,32,0.452,58,tested_positive +3,100,68,23,81,31.6,0.949,28,tested_negative +1,100,66,29,196,32,0.444,42,tested_negative +5,166,76,0,0,45.7,0.34,27,tested_positive +1,131,64,14,415,23.7,0.389,21,tested_negative +4,116,72,12,87,22.1,0.463,37,tested_negative +4,158,78,0,0,32.9,0.803,31,tested_positive +2,127,58,24,275,27.7,1.6,25,tested_negative +3,96,56,34,115,24.7,0.944,39,tested_negative +0,131,66,40,0,34.3,0.196,22,tested_positive +3,82,70,0,0,21.1,0.389,25,tested_negative +3,193,70,31,0,34.9,0.241,25,tested_positive +4,95,64,0,0,32,0.161,31,tested_positive +6,137,61,0,0,24.2,0.151,55,tested_negative +5,136,84,41,88,35,0.286,35,tested_positive +9,72,78,25,0,31.6,0.28,38,tested_negative +5,168,64,0,0,32.9,0.135,41,tested_positive +2,123,48,32,165,42.1,0.52,26,tested_negative +4,115,72,0,0,28.9,0.376,46,tested_positive +0,101,62,0,0,21.9,0.336,25,tested_negative +8,197,74,0,0,25.9,1.191,39,tested_positive +1,172,68,49,579,42.4,0.702,28,tested_positive +6,102,90,39,0,35.7,0.674,28,tested_negative +1,112,72,30,176,34.4,0.528,25,tested_negative +1,143,84,23,310,42.4,1.076,22,tested_negative +1,143,74,22,61,26.2,0.256,21,tested_negative +0,138,60,35,167,34.6,0.534,21,tested_positive +3,173,84,33,474,35.7,0.258,22,tested_positive +1,97,68,21,0,27.2,1.095,22,tested_negative +4,144,82,32,0,38.5,0.554,37,tested_positive +1,83,68,0,0,18.2,0.624,27,tested_negative +3,129,64,29,115,26.4,0.219,28,tested_positive +1,119,88,41,170,45.3,0.507,26,tested_negative +2,94,68,18,76,26,0.561,21,tested_negative +0,102,64,46,78,40.6,0.496,21,tested_negative +2,115,64,22,0,30.8,0.421,21,tested_negative +8,151,78,32,210,42.9,0.516,36,tested_positive +4,184,78,39,277,37,0.264,31,tested_positive +0,94,0,0,0,0,0.256,25,tested_negative +1,181,64,30,180,34.1,0.328,38,tested_positive +0,135,94,46,145,40.6,0.284,26,tested_negative +1,95,82,25,180,35,0.233,43,tested_positive +2,99,0,0,0,22.2,0.108,23,tested_negative +3,89,74,16,85,30.4,0.551,38,tested_negative +1,80,74,11,60,30,0.527,22,tested_negative +2,139,75,0,0,25.6,0.167,29,tested_negative +1,90,68,8,0,24.5,1.138,36,tested_negative +0,141,0,0,0,42.4,0.205,29,tested_positive +12,140,85,33,0,37.4,0.244,41,tested_negative +5,147,75,0,0,29.9,0.434,28,tested_negative +1,97,70,15,0,18.2,0.147,21,tested_negative +6,107,88,0,0,36.8,0.727,31,tested_negative +0,189,104,25,0,34.3,0.435,41,tested_positive +2,83,66,23,50,32.2,0.497,22,tested_negative +4,117,64,27,120,33.2,0.23,24,tested_negative +8,108,70,0,0,30.5,0.955,33,tested_positive +4,117,62,12,0,29.7,0.38,30,tested_positive +0,180,78,63,14,59.4,2.42,25,tested_positive +1,100,72,12,70,25.3,0.658,28,tested_negative +0,95,80,45,92,36.5,0.33,26,tested_negative +0,104,64,37,64,33.6,0.51,22,tested_positive +0,120,74,18,63,30.5,0.285,26,tested_negative +1,82,64,13,95,21.2,0.415,23,tested_negative +2,134,70,0,0,28.9,0.542,23,tested_positive +0,91,68,32,210,39.9,0.381,25,tested_negative +2,119,0,0,0,19.6,0.832,72,tested_negative +2,100,54,28,105,37.8,0.498,24,tested_negative +14,175,62,30,0,33.6,0.212,38,tested_positive +1,135,54,0,0,26.7,0.687,62,tested_negative +5,86,68,28,71,30.2,0.364,24,tested_negative +10,148,84,48,237,37.6,1.001,51,tested_positive +9,134,74,33,60,25.9,0.46,81,tested_negative +9,120,72,22,56,20.8,0.733,48,tested_negative +1,71,62,0,0,21.8,0.416,26,tested_negative +8,74,70,40,49,35.3,0.705,39,tested_negative +5,88,78,30,0,27.6,0.258,37,tested_negative +10,115,98,0,0,24,1.022,34,tested_negative +0,124,56,13,105,21.8,0.452,21,tested_negative +0,74,52,10,36,27.8,0.269,22,tested_negative +0,97,64,36,100,36.8,0.6,25,tested_negative +8,120,0,0,0,30,0.183,38,tested_positive +6,154,78,41,140,46.1,0.571,27,tested_negative +1,144,82,40,0,41.3,0.607,28,tested_negative +0,137,70,38,0,33.2,0.17,22,tested_negative +0,119,66,27,0,38.8,0.259,22,tested_negative +7,136,90,0,0,29.9,0.21,50,tested_negative +4,114,64,0,0,28.9,0.126,24,tested_negative +0,137,84,27,0,27.3,0.231,59,tested_negative +2,105,80,45,191,33.7,0.711,29,tested_positive +7,114,76,17,110,23.8,0.466,31,tested_negative +8,126,74,38,75,25.9,0.162,39,tested_negative +4,132,86,31,0,28,0.419,63,tested_negative +3,158,70,30,328,35.5,0.344,35,tested_positive +0,123,88,37,0,35.2,0.197,29,tested_negative +4,85,58,22,49,27.8,0.306,28,tested_negative +0,84,82,31,125,38.2,0.233,23,tested_negative +0,145,0,0,0,44.2,0.63,31,tested_positive +0,135,68,42,250,42.3,0.365,24,tested_positive +1,139,62,41,480,40.7,0.536,21,tested_negative +0,173,78,32,265,46.5,1.159,58,tested_negative +4,99,72,17,0,25.6,0.294,28,tested_negative +8,194,80,0,0,26.1,0.551,67,tested_negative +2,83,65,28,66,36.8,0.629,24,tested_negative +2,89,90,30,0,33.5,0.292,42,tested_negative +4,99,68,38,0,32.8,0.145,33,tested_negative +4,125,70,18,122,28.9,1.144,45,tested_positive +3,80,0,0,0,0,0.174,22,tested_negative +6,166,74,0,0,26.6,0.304,66,tested_negative +5,110,68,0,0,26,0.292,30,tested_negative +2,81,72,15,76,30.1,0.547,25,tested_negative +7,195,70,33,145,25.1,0.163,55,tested_positive +6,154,74,32,193,29.3,0.839,39,tested_negative +2,117,90,19,71,25.2,0.313,21,tested_negative +3,84,72,32,0,37.2,0.267,28,tested_negative +6,0,68,41,0,39,0.727,41,tested_positive +7,94,64,25,79,33.3,0.738,41,tested_negative +3,96,78,39,0,37.3,0.238,40,tested_negative +10,75,82,0,0,33.3,0.263,38,tested_negative +0,180,90,26,90,36.5,0.314,35,tested_positive +1,130,60,23,170,28.6,0.692,21,tested_negative +2,84,50,23,76,30.4,0.968,21,tested_negative +8,120,78,0,0,25,0.409,64,tested_negative +12,84,72,31,0,29.7,0.297,46,tested_positive +0,139,62,17,210,22.1,0.207,21,tested_negative +9,91,68,0,0,24.2,0.2,58,tested_negative +2,91,62,0,0,27.3,0.525,22,tested_negative +3,99,54,19,86,25.6,0.154,24,tested_negative +3,163,70,18,105,31.6,0.268,28,tested_positive +9,145,88,34,165,30.3,0.771,53,tested_positive +7,125,86,0,0,37.6,0.304,51,tested_negative +13,76,60,0,0,32.8,0.18,41,tested_negative +6,129,90,7,326,19.6,0.582,60,tested_negative +2,68,70,32,66,25,0.187,25,tested_negative +3,124,80,33,130,33.2,0.305,26,tested_negative +6,114,0,0,0,0,0.189,26,tested_negative +9,130,70,0,0,34.2,0.652,45,tested_positive +3,125,58,0,0,31.6,0.151,24,tested_negative +3,87,60,18,0,21.8,0.444,21,tested_negative +1,97,64,19,82,18.2,0.299,21,tested_negative +3,116,74,15,105,26.3,0.107,24,tested_negative +0,117,66,31,188,30.8,0.493,22,tested_negative +0,111,65,0,0,24.6,0.66,31,tested_negative +2,122,60,18,106,29.8,0.717,22,tested_negative +0,107,76,0,0,45.3,0.686,24,tested_negative +1,86,66,52,65,41.3,0.917,29,tested_negative +6,91,0,0,0,29.8,0.501,31,tested_negative +1,77,56,30,56,33.3,1.251,24,tested_negative +4,132,0,0,0,32.9,0.302,23,tested_positive +0,105,90,0,0,29.6,0.197,46,tested_negative +0,57,60,0,0,21.7,0.735,67,tested_negative +0,127,80,37,210,36.3,0.804,23,tested_negative +3,129,92,49,155,36.4,0.968,32,tested_positive +8,100,74,40,215,39.4,0.661,43,tested_positive +3,128,72,25,190,32.4,0.549,27,tested_positive +10,90,85,32,0,34.9,0.825,56,tested_positive +4,84,90,23,56,39.5,0.159,25,tested_negative +1,88,78,29,76,32,0.365,29,tested_negative +8,186,90,35,225,34.5,0.423,37,tested_positive +5,187,76,27,207,43.6,1.034,53,tested_positive +4,131,68,21,166,33.1,0.16,28,tested_negative +1,164,82,43,67,32.8,0.341,50,tested_negative +4,189,110,31,0,28.5,0.68,37,tested_negative +1,116,70,28,0,27.4,0.204,21,tested_negative +3,84,68,30,106,31.9,0.591,25,tested_negative +6,114,88,0,0,27.8,0.247,66,tested_negative +1,88,62,24,44,29.9,0.422,23,tested_negative +1,84,64,23,115,36.9,0.471,28,tested_negative +7,124,70,33,215,25.5,0.161,37,tested_negative +1,97,70,40,0,38.1,0.218,30,tested_negative +8,110,76,0,0,27.8,0.237,58,tested_negative +11,103,68,40,0,46.2,0.126,42,tested_negative +11,85,74,0,0,30.1,0.3,35,tested_negative +6,125,76,0,0,33.8,0.121,54,tested_positive +0,198,66,32,274,41.3,0.502,28,tested_positive +1,87,68,34,77,37.6,0.401,24,tested_negative +6,99,60,19,54,26.9,0.497,32,tested_negative +0,91,80,0,0,32.4,0.601,27,tested_negative +2,95,54,14,88,26.1,0.748,22,tested_negative +1,99,72,30,18,38.6,0.412,21,tested_negative +6,92,62,32,126,32,0.085,46,tested_negative +4,154,72,29,126,31.3,0.338,37,tested_negative +0,121,66,30,165,34.3,0.203,33,tested_positive +3,78,70,0,0,32.5,0.27,39,tested_negative +2,130,96,0,0,22.6,0.268,21,tested_negative +3,111,58,31,44,29.5,0.43,22,tested_negative +2,98,60,17,120,34.7,0.198,22,tested_negative +1,143,86,30,330,30.1,0.892,23,tested_negative +1,119,44,47,63,35.5,0.28,25,tested_negative +6,108,44,20,130,24,0.813,35,tested_negative +2,118,80,0,0,42.9,0.693,21,tested_positive +10,133,68,0,0,27,0.245,36,tested_negative +2,197,70,99,0,34.7,0.575,62,tested_positive +0,151,90,46,0,42.1,0.371,21,tested_positive +6,109,60,27,0,25,0.206,27,tested_negative +12,121,78,17,0,26.5,0.259,62,tested_negative +8,100,76,0,0,38.7,0.19,42,tested_negative +8,124,76,24,600,28.7,0.687,52,tested_positive +1,93,56,11,0,22.5,0.417,22,tested_negative +8,143,66,0,0,34.9,0.129,41,tested_positive +6,103,66,0,0,24.3,0.249,29,tested_negative +3,176,86,27,156,33.3,1.154,52,tested_positive +0,73,0,0,0,21.1,0.342,25,tested_negative +11,111,84,40,0,46.8,0.925,45,tested_positive +2,112,78,50,140,39.4,0.175,24,tested_negative +3,132,80,0,0,34.4,0.402,44,tested_positive +2,82,52,22,115,28.5,1.699,25,tested_negative +6,123,72,45,230,33.6,0.733,34,tested_negative +0,188,82,14,185,32,0.682,22,tested_positive +0,67,76,0,0,45.3,0.194,46,tested_negative +1,89,24,19,25,27.8,0.559,21,tested_negative +1,173,74,0,0,36.8,0.088,38,tested_positive +1,109,38,18,120,23.1,0.407,26,tested_negative +1,108,88,19,0,27.1,0.4,24,tested_negative +6,96,0,0,0,23.7,0.19,28,tested_negative +1,124,74,36,0,27.8,0.1,30,tested_negative +7,150,78,29,126,35.2,0.692,54,tested_positive +4,183,0,0,0,28.4,0.212,36,tested_positive +1,124,60,32,0,35.8,0.514,21,tested_negative +1,181,78,42,293,40,1.258,22,tested_positive +1,92,62,25,41,19.5,0.482,25,tested_negative +0,152,82,39,272,41.5,0.27,27,tested_negative +1,111,62,13,182,24,0.138,23,tested_negative +3,106,54,21,158,30.9,0.292,24,tested_negative +3,174,58,22,194,32.9,0.593,36,tested_positive +7,168,88,42,321,38.2,0.787,40,tested_positive +6,105,80,28,0,32.5,0.878,26,tested_negative +11,138,74,26,144,36.1,0.557,50,tested_positive +3,106,72,0,0,25.8,0.207,27,tested_negative +6,117,96,0,0,28.7,0.157,30,tested_negative +2,68,62,13,15,20.1,0.257,23,tested_negative +9,112,82,24,0,28.2,1.282,50,tested_positive +0,119,0,0,0,32.4,0.141,24,tested_positive +2,112,86,42,160,38.4,0.246,28,tested_negative +2,92,76,20,0,24.2,1.698,28,tested_negative +6,183,94,0,0,40.8,1.461,45,tested_negative +0,94,70,27,115,43.5,0.347,21,tested_negative +2,108,64,0,0,30.8,0.158,21,tested_negative +4,90,88,47,54,37.7,0.362,29,tested_negative +0,125,68,0,0,24.7,0.206,21,tested_negative +0,132,78,0,0,32.4,0.393,21,tested_negative +5,128,80,0,0,34.6,0.144,45,tested_negative +4,94,65,22,0,24.7,0.148,21,tested_negative +7,114,64,0,0,27.4,0.732,34,tested_positive +0,102,78,40,90,34.5,0.238,24,tested_negative +2,111,60,0,0,26.2,0.343,23,tested_negative +1,128,82,17,183,27.5,0.115,22,tested_negative +10,92,62,0,0,25.9,0.167,31,tested_negative +13,104,72,0,0,31.2,0.465,38,tested_positive +5,104,74,0,0,28.8,0.153,48,tested_negative +2,94,76,18,66,31.6,0.649,23,tested_negative +7,97,76,32,91,40.9,0.871,32,tested_positive +1,100,74,12,46,19.5,0.149,28,tested_negative +0,102,86,17,105,29.3,0.695,27,tested_negative +4,128,70,0,0,34.3,0.303,24,tested_negative +6,147,80,0,0,29.5,0.178,50,tested_positive +4,90,0,0,0,28,0.61,31,tested_negative +3,103,72,30,152,27.6,0.73,27,tested_negative +2,157,74,35,440,39.4,0.134,30,tested_negative +1,167,74,17,144,23.4,0.447,33,tested_positive +0,179,50,36,159,37.8,0.455,22,tested_positive +11,136,84,35,130,28.3,0.26,42,tested_positive +0,107,60,25,0,26.4,0.133,23,tested_negative +1,91,54,25,100,25.2,0.234,23,tested_negative +1,117,60,23,106,33.8,0.466,27,tested_negative +5,123,74,40,77,34.1,0.269,28,tested_negative +2,120,54,0,0,26.8,0.455,27,tested_negative +1,106,70,28,135,34.2,0.142,22,tested_negative +2,155,52,27,540,38.7,0.24,25,tested_positive +2,101,58,35,90,21.8,0.155,22,tested_negative +1,120,80,48,200,38.9,1.162,41,tested_negative +11,127,106,0,0,39,0.19,51,tested_negative +3,80,82,31,70,34.2,1.292,27,tested_positive +10,162,84,0,0,27.7,0.182,54,tested_negative +1,199,76,43,0,42.9,1.394,22,tested_positive +8,167,106,46,231,37.6,0.165,43,tested_positive +9,145,80,46,130,37.9,0.637,40,tested_positive +6,115,60,39,0,33.7,0.245,40,tested_positive +1,112,80,45,132,34.8,0.217,24,tested_negative +4,145,82,18,0,32.5,0.235,70,tested_positive +10,111,70,27,0,27.5,0.141,40,tested_positive +6,98,58,33,190,34,0.43,43,tested_negative +9,154,78,30,100,30.9,0.164,45,tested_negative +6,165,68,26,168,33.6,0.631,49,tested_negative +1,99,58,10,0,25.4,0.551,21,tested_negative +10,68,106,23,49,35.5,0.285,47,tested_negative +3,123,100,35,240,57.3,0.88,22,tested_negative +8,91,82,0,0,35.6,0.587,68,tested_negative +6,195,70,0,0,30.9,0.328,31,tested_positive +9,156,86,0,0,24.8,0.23,53,tested_positive +0,93,60,0,0,35.3,0.263,25,tested_negative +3,121,52,0,0,36,0.127,25,tested_positive +2,101,58,17,265,24.2,0.614,23,tested_negative +2,56,56,28,45,24.2,0.332,22,tested_negative +0,162,76,36,0,49.6,0.364,26,tested_positive +0,95,64,39,105,44.6,0.366,22,tested_negative +4,125,80,0,0,32.3,0.536,27,tested_positive +5,136,82,0,0,0,0.64,69,tested_negative +2,129,74,26,205,33.2,0.591,25,tested_negative +3,130,64,0,0,23.1,0.314,22,tested_negative +1,107,50,19,0,28.3,0.181,29,tested_negative +1,140,74,26,180,24.1,0.828,23,tested_negative +1,144,82,46,180,46.1,0.335,46,tested_positive +8,107,80,0,0,24.6,0.856,34,tested_negative +13,158,114,0,0,42.3,0.257,44,tested_positive +2,121,70,32,95,39.1,0.886,23,tested_negative +7,129,68,49,125,38.5,0.439,43,tested_positive +2,90,60,0,0,23.5,0.191,25,tested_negative +7,142,90,24,480,30.4,0.128,43,tested_positive +3,169,74,19,125,29.9,0.268,31,tested_positive +0,99,0,0,0,25,0.253,22,tested_negative +4,127,88,11,155,34.5,0.598,28,tested_negative +4,118,70,0,0,44.5,0.904,26,tested_negative +2,122,76,27,200,35.9,0.483,26,tested_negative +6,125,78,31,0,27.6,0.565,49,tested_positive +1,168,88,29,0,35,0.905,52,tested_positive +2,129,0,0,0,38.5,0.304,41,tested_negative +4,110,76,20,100,28.4,0.118,27,tested_negative +6,80,80,36,0,39.8,0.177,28,tested_negative +10,115,0,0,0,0,0.261,30,tested_positive +2,127,46,21,335,34.4,0.176,22,tested_negative +9,164,78,0,0,32.8,0.148,45,tested_positive +2,93,64,32,160,38,0.674,23,tested_positive +3,158,64,13,387,31.2,0.295,24,tested_negative +5,126,78,27,22,29.6,0.439,40,tested_negative +10,129,62,36,0,41.2,0.441,38,tested_positive +0,134,58,20,291,26.4,0.352,21,tested_negative +3,102,74,0,0,29.5,0.121,32,tested_negative +7,187,50,33,392,33.9,0.826,34,tested_positive +3,173,78,39,185,33.8,0.97,31,tested_positive +10,94,72,18,0,23.1,0.595,56,tested_negative +1,108,60,46,178,35.5,0.415,24,tested_negative +5,97,76,27,0,35.6,0.378,52,tested_positive +4,83,86,19,0,29.3,0.317,34,tested_negative +1,114,66,36,200,38.1,0.289,21,tested_negative +1,149,68,29,127,29.3,0.349,42,tested_positive +5,117,86,30,105,39.1,0.251,42,tested_negative +1,111,94,0,0,32.8,0.265,45,tested_negative +4,112,78,40,0,39.4,0.236,38,tested_negative +1,116,78,29,180,36.1,0.496,25,tested_negative +0,141,84,26,0,32.4,0.433,22,tested_negative +2,175,88,0,0,22.9,0.326,22,tested_negative +2,92,52,0,0,30.1,0.141,22,tested_negative +3,130,78,23,79,28.4,0.323,34,tested_positive +8,120,86,0,0,28.4,0.259,22,tested_positive +2,174,88,37,120,44.5,0.646,24,tested_positive +2,106,56,27,165,29,0.426,22,tested_negative +2,105,75,0,0,23.3,0.56,53,tested_negative +4,95,60,32,0,35.4,0.284,28,tested_negative +0,126,86,27,120,27.4,0.515,21,tested_negative +8,65,72,23,0,32,0.6,42,tested_negative +2,99,60,17,160,36.6,0.453,21,tested_negative +1,102,74,0,0,39.5,0.293,42,tested_positive +11,120,80,37,150,42.3,0.785,48,tested_positive +3,102,44,20,94,30.8,0.4,26,tested_negative +1,109,58,18,116,28.5,0.219,22,tested_negative +9,140,94,0,0,32.7,0.734,45,tested_positive +13,153,88,37,140,40.6,1.174,39,tested_negative +12,100,84,33,105,30,0.488,46,tested_negative +1,147,94,41,0,49.3,0.358,27,tested_positive +1,81,74,41,57,46.3,1.096,32,tested_negative +3,187,70,22,200,36.4,0.408,36,tested_positive +6,162,62,0,0,24.3,0.178,50,tested_positive +4,136,70,0,0,31.2,1.182,22,tested_positive +1,121,78,39,74,39,0.261,28,tested_negative +3,108,62,24,0,26,0.223,25,tested_negative +0,181,88,44,510,43.3,0.222,26,tested_positive +8,154,78,32,0,32.4,0.443,45,tested_positive +1,128,88,39,110,36.5,1.057,37,tested_positive +7,137,90,41,0,32,0.391,39,tested_negative +0,123,72,0,0,36.3,0.258,52,tested_positive +1,106,76,0,0,37.5,0.197,26,tested_negative +6,190,92,0,0,35.5,0.278,66,tested_positive +2,88,58,26,16,28.4,0.766,22,tested_negative +9,170,74,31,0,44,0.403,43,tested_positive +9,89,62,0,0,22.5,0.142,33,tested_negative +10,101,76,48,180,32.9,0.171,63,tested_negative +2,122,70,27,0,36.8,0.34,27,tested_negative +5,121,72,23,112,26.2,0.245,30,tested_negative +1,126,60,0,0,30.1,0.349,47,tested_positive +1,93,70,31,0,30.4,0.315,23,tested_negative diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.pkl.py3 b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.pkl.py3 new file mode 100644 index 000000000..e22d32d24 Binary files /dev/null and b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/dataset.pkl.py3 differ diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/description.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/description.xml new file mode 100644 index 000000000..a8ca546c0 --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/description.xml @@ -0,0 +1,98 @@ + + 20 + diabetes + 1 + **Author**: +**Source**: Unknown - +**Please cite**: + +1. Title: Pima Indians Diabetes Database + + 2. Sources: + (a) Original owners: National Institute of Diabetes and Digestive and + Kidney Diseases + (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu) + Research Center, RMI Group Leader + Applied Physics Laboratory + The Johns Hopkins University + Johns Hopkins Road + Laurel, MD 20707 + (301) 953-6231 + (c) Date received: 9 May 1990 + + 3. Past Usage: + 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., & + Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast + the onset of diabetes mellitus. In {it Proceedings of the Symposium + on Computer Applications and Medical Care} (pp. 261--265). IEEE + Computer Society Press. + + The diagnostic, binary-valued variable investigated is whether the + patient shows signs of diabetes according to World Health Organization + criteria (i.e., if the 2 hour post-load plasma glucose was at least + 200 mg/dl at any survey examination or if found during routine medical + care). The population lives near Phoenix, Arizona, USA. + + Results: Their ADAP algorithm makes a real-valued prediction between + 0 and 1. This was transformed into a binary decision using a cutoff of + 0.448. Using 576 training instances, the sensitivity and specificity + of their algorithm was 76% on the remaining 192 instances. + + 4. Relevant Information: + Several constraints were placed on the selection of these instances from + a larger database. In particular, all patients here are females at + least 21 years old of Pima Indian heritage. ADAP is an adaptive learning + routine that generates and executes digital analogs of perceptron-like + devices. It is a unique algorithm; see the paper for details. + + 5. Number of Instances: 768 + + 6. Number of Attributes: 8 plus class + + 7. For Each Attribute: (all numeric-valued) + 1. Number of times pregnant + 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test + 3. Diastolic blood pressure (mm Hg) + 4. Triceps skin fold thickness (mm) + 5. 2-Hour serum insulin (mu U/ml) + 6. Body mass index (weight in kg/(height in m)^2) + 7. Diabetes pedigree function + 8. Age (years) + 9. Class variable (0 or 1) + + 8. Missing Attribute Values: None + + 9. Class Distribution: (class value 1 is interpreted as "tested positive for + diabetes") + + Class Value Number of instances + 0 500 + 1 268 + + 10. Brief statistical analysis: + + Attribute number: Mean: Standard Deviation: + 1. 3.8 3.4 + 2. 120.9 32.0 + 3. 69.1 19.4 + 4. 20.5 16.0 + 5. 79.8 115.2 + 6. 32.0 7.9 + 7. 0.5 0.3 + 8. 33.2 11.8 + + + + + + + Relabeled values in attribute 'class' + From: 0 To: tested_negative + From: 1 To: tested_positive + 1 + ARFF + 2014-04-06T23:22:13 + Public https://test.openml.org/data/v1/download/20/diabetes.arff + 20 class 1 study_14 public active + 2025-06-16 08:10:37 3cbaa3e54586aa88cf6aacb4033e4470 + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml new file mode 100644 index 000000000..cfbafaec6 --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml @@ -0,0 +1,85 @@ + + + 0 + preg + numeric + false + false + false + 0 + + + 1 + plas + numeric + false + false + false + 0 + + + 2 + pres + numeric + false + false + false + 0 + + + 3 + skin + numeric + false + false + false + 0 + + + 4 + insu + numeric + false + false + false + 0 + + + 5 + mass + numeric + false + false + false + 0 + + + 6 + pedi + numeric + false + false + false + 0 + + + 7 + age + numeric + false + false + false + 0 + + + 8 + class + nominal + tested_negative + tested_positive + true + false + false + 0 + + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml.pkl b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml.pkl new file mode 100644 index 000000000..b6dcf8ef3 Binary files /dev/null and b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/datasets/20/features.xml.pkl differ diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/flows/205905/flow.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/flows/205905/flow.xml new file mode 100644 index 000000000..bda310c1e --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/flows/205905/flow.xml @@ -0,0 +1,546 @@ + + + 205905 +1159 +TESTa8a9af7f85sklearn.pipeline.Pipeline(cat_handling=sklearn.compose._column_transformer.ColumnTransformer(cat=sklearn.preprocessing._encoders.OneHotEncoder),imp=sklearn.impute._base.SimpleImputer,classifier=sklearn.dummy.DummyClassifier) +sklearn.Pipeline(ColumnTransformer,SimpleImputer,DummyClassifier) +sklearn.pipeline.Pipeline +1 +openml==0.16.0,sklearn==1.7.2 +A sequence of data transformers with an optional final predictor. + +`Pipeline` allows you to sequentially apply a list of transformers to +preprocess the data and, if desired, conclude the sequence with a final +:term:`predictor` for predictive modeling. + +Intermediate steps of the pipeline must be transformers, that is, they +must implement `fit` and `transform` methods. +The final :term:`estimator` only needs to implement `fit`. +The transformers in the pipeline can be cached using ``memory`` argument. + +The purpose of the pipeline is to assemble several steps that can be +cross-validated together while setting different parameters. For this, it +enables setting parameters of the various steps using their names and the +parameter name separated by a `'__'`, as in the example below. A step's +estimator may be replaced entirely by setting the parameter with its name +to another estimator, or a transformer removed by setting it to +`'passthrough'` or `None`. + +For an example use case of `Pipeline` combined with +:class:`~s... +2025-11-15T10:47:52 +English +sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + memory + str or object with the joblib + null + Used to cache the fitted transformers of the pipeline. The last step + will never be cached, even if it is a transformer. By default, no + caching is performed. If a string is given, it is the path to the + caching directory. Enabling caching triggers a clone of the transformers + before fitting. Therefore, the transformer instance given to the + pipeline cannot be inspected directly. Use the attribute ``named_steps`` + or ``steps`` to inspect estimators within the pipeline. Caching the + transformers is advantageous when fitting is time consuming. See + :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py` + for an example on how to enable caching + + + steps + list of tuples + [{"oml-python:serialized_object": "component_reference", "value": {"key": "cat_handling", "step_name": "cat_handling"}}, {"oml-python:serialized_object": "component_reference", "value": {"key": "imp", "step_name": "imp"}}, {"oml-python:serialized_object": "component_reference", "value": {"key": "classifier", "step_name": "classifier"}}] + List of (name of step, estimator) tuples that are to be chained in + sequential order. To be compatible with the scikit-learn API, all steps + must define `fit`. All non-last steps must also define `transform`. See + :ref:`Combining Estimators <combining_estimators>` for more details + + + transform_input + list of str + null + The names of the :term:`metadata` parameters that should be transformed by the + pipeline before passing it to the step consuming it + + This enables transforming some input arguments to ``fit`` (other than ``X``) + to be transformed by the steps of the pipeline up to the step which requires + them. Requirement is defined via :ref:`metadata routing <metadata_routing>` + For instance, this can be used to pass a validation set through the pipeline + + You can only set this if metadata routing is enabled, which you + can enable using ``sklearn.set_config(enable_metadata_routing=True)`` + + .. versionadded:: 1.6 + + + verbose + bool + false + If True, the time elapsed while fitting each step will be printed as it + is completed. + + + cat_handling + + + 205906 +1159 +TESTa8a9af7f85sklearn.compose._column_transformer.ColumnTransformer(cat=sklearn.preprocessing._encoders.OneHotEncoder) +sklearn.ColumnTransformer +sklearn.compose._column_transformer.ColumnTransformer +1 +openml==0.16.0,sklearn==1.7.2 +Applies transformers to columns of an array or pandas DataFrame. + +This estimator allows different columns or column subsets of the input +to be transformed separately and the features generated by each transformer +will be concatenated to form a single feature space. +This is useful for heterogeneous or columnar data, to combine several +feature extraction mechanisms or transformations into a single transformer. +2025-11-15T10:47:52 +English +sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + force_int_remainder_cols + bool + "deprecated" + This parameter has no effect + + .. note:: + If you do not access the list of columns for the remainder columns + in the `transformers_` fitted attribute, you do not need to set + this parameter + + .. versionadded:: 1.5 + + .. versionchanged:: 1.7 + The default value for `force_int_remainder_cols` will change from + `True` to `False` in version 1.7 + + .. deprecated:: 1.7 + `force_int_remainder_cols` is deprecated and will be removed in 1.9. + + + n_jobs + int + null + Number of jobs to run in parallel + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context + ``-1`` means using all processors. See :term:`Glossary <n_jobs>` + for more details + + + remainder + + "passthrough" + + + + sparse_threshold + float + 0.3 + If the output of the different transformers contains sparse matrices, + these will be stacked as a sparse matrix if the overall density is + lower than this value. Use ``sparse_threshold=0`` to always return + dense. When the transformed output consists of all dense data, the + stacked result will be dense, and this keyword will be ignored + + + transformer_weights + dict + null + Multiplicative weights for features per transformer. The output of the + transformer is multiplied by these weights. Keys are transformer names, + values the weights + + + transformers + list of tuples + [{"oml-python:serialized_object": "component_reference", "value": {"key": "cat", "step_name": "cat", "argument_1": {"oml-python:serialized_object": "function", "value": "tests.test_runs.test_run._cat_col_selector"}}}] + List of (name, transformer, columns) tuples specifying the + transformer objects to be applied to subsets of the data + + + verbose + bool + false + If True, the time elapsed while fitting each transformer will be + printed as it is completed + + + verbose_feature_names_out + bool + true + - If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix + all feature names with the name of the transformer that generated that + feature. It is equivalent to setting + `verbose_feature_names_out="{transformer_name}__{feature_name}"` + - If False, :meth:`ColumnTransformer.get_feature_names_out` will not + prefix any feature names and will error if feature names are not + unique + - If ``Callable[[str, str], str]``, + :meth:`ColumnTransformer.get_feature_names_out` will rename all the features + using the name of the transformer. The first argument of the callable is the + transformer name and the second argument is the feature name. The returned + string will be the new feature name + - If ``str``, it must be a string ready for formatting. The given string will + be formatted using two field names: ``transformer_name`` and ``feature_name`` + e.g. ``"{feature_name}__{transformer_name}"``. See :meth:`str.format` method + from the standard ... + + + cat + + + 205907 +1159 +TESTa8a9af7f85sklearn.preprocessing._encoders.OneHotEncoder +sklearn.OneHotEncoder +sklearn.preprocessing._encoders.OneHotEncoder +1 +openml==0.16.0,sklearn==1.7.2 +Encode categorical features as a one-hot numeric array. + +The input to this transformer should be an array-like of integers or +strings, denoting the values taken on by categorical (discrete) features. +The features are encoded using a one-hot (aka 'one-of-K' or 'dummy') +encoding scheme. This creates a binary column for each category and +returns a sparse matrix or dense array (depending on the ``sparse_output`` +parameter). + +By default, the encoder derives the categories based on the unique values +in each feature. Alternatively, you can also specify the `categories` +manually. + +This encoding is needed for feeding categorical data to many scikit-learn +estimators, notably linear models and SVMs with the standard kernels. + +Note: a one-hot encoding of y labels should use a LabelBinarizer +instead. +2025-11-15T10:47:52 +English +sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + categories + 'auto' or a list of array + "auto" + Categories (unique values) per feature: + + - 'auto' : Determine categories automatically from the training data + - list : ``categories[i]`` holds the categories expected in the ith + column. The passed categories should not mix strings and numeric + values within a single feature, and should be sorted in case of + numeric values + + The used categories can be found in the ``categories_`` attribute + + .. versionadded:: 0.20 + +drop : {'first', 'if_binary'} or an array-like of shape (n_features,), default=None + Specifies a methodology to use to drop one of the categories per + feature. This is useful in situations where perfectly collinear + features cause problems, such as when feeding the resulting data + into an unregularized linear regression model + + However, dropping one category breaks the symmetry of the original + representation and can therefore induce a bias in downstream models, + for instance for penalized linear classification or regression models + + + drop + + null + + + + dtype + number type + {"oml-python:serialized_object": "type", "value": "np.float64"} + Desired dtype of output + +handle_unknown : {'error', 'ignore', 'infrequent_if_exist', 'warn'}, default='error' + Specifies the way unknown categories are handled during :meth:`transform` + + - 'error' : Raise an error if an unknown category is present during transform + - 'ignore' : When an unknown category is encountered during + transform, the resulting one-hot encoded columns for this feature + will be all zeros. In the inverse transform, an unknown category + will be denoted as None + - 'infrequent_if_exist' : When an unknown category is encountered + during transform, the resulting one-hot encoded columns for this + feature will map to the infrequent category if it exists. The + infrequent category will be mapped to the last position in the + encoding. During inverse transform, an unknown category will be + mapped to the category denoted `'infrequent'` if it exists. If the + `'infrequent'` category does not exist, then :meth:`transform` an... + + + feature_name_combiner + + "concat" + + + + handle_unknown + + "ignore" + + + + max_categories + int + null + Specifies an upper limit to the number of output features for each input + feature when considering infrequent categories. If there are infrequent + categories, `max_categories` includes the category representing the + infrequent categories along with the frequent categories. If `None`, + there is no limit to the number of output features + + .. versionadded:: 1.1 + Read more in the :ref:`User Guide <encoder_infrequent_categories>` + +feature_name_combiner : "concat" or callable, default="concat" + Callable with signature `def callable(input_feature, category)` that returns a + string. This is used to create feature names to be returned by + :meth:`get_feature_names_out` + + `"concat"` concatenates encoded feature name and category with + `feature + "_" + str(category)`.E.g. feature X with values 1, 6, 7 create + feature names `X_1, X_6, X_7` + + .. versionadded:: 1.3 + + + min_frequency + int or float + null + Specifies the minimum frequency below which a category will be + considered infrequent + + - If `int`, categories with a smaller cardinality will be considered + infrequent + + - If `float`, categories with a smaller cardinality than + `min_frequency * n_samples` will be considered infrequent + + .. versionadded:: 1.1 + Read more in the :ref:`User Guide <encoder_infrequent_categories>` + + + sparse_output + bool + true + When ``True``, it returns a :class:`scipy.sparse.csr_matrix`, + i.e. a sparse matrix in "Compressed Sparse Row" (CSR) format + + .. versionadded:: 1.2 + `sparse` was renamed to `sparse_output` + +openml-python +python +scikit-learn +sklearn +sklearn_1.7.2 + + +openml-python +python +scikit-learn +sklearn +sklearn_1.7.2 + + + + imp + + + 205908 +1159 +TESTa8a9af7f85sklearn.impute._base.SimpleImputer +sklearn.SimpleImputer +sklearn.impute._base.SimpleImputer +1 +openml==0.16.0,sklearn==1.7.2 +Univariate imputer for completing missing values with simple strategies. + +Replace missing values using a descriptive statistic (e.g. mean, median, or +most frequent) along each column, or using a constant value. +2025-11-15T10:47:52 +English +sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + add_indicator + bool + false + If True, a :class:`MissingIndicator` transform will stack onto output + of the imputer's transform. This allows a predictive estimator + to account for missingness despite imputation. If a feature has no + missing values at fit/train time, the feature won't appear on + the missing indicator even if there are missing values at + transform/test time + + + copy + bool + true + If True, a copy of X will be created. If False, imputation will + be done in-place whenever possible. Note that, in the following cases, + a new copy will always be made, even if `copy=False`: + + - If `X` is not an array of floating values; + - If `X` is encoded as a CSR matrix; + - If `add_indicator=True` + + + fill_value + str or numerical value + null + When strategy == "constant", `fill_value` is used to replace all + occurrences of missing_values. For string or object data types, + `fill_value` must be a string + If `None`, `fill_value` will be 0 when imputing numerical + data and "missing_value" for strings or object data types + + + keep_empty_features + bool + false + If True, features that consist exclusively of missing values when + `fit` is called are returned in results when `transform` is called + The imputed value is always `0` except when `strategy="constant"` + in which case `fill_value` will be used instead + + .. versionadded:: 1.2 + + .. versionchanged:: 1.6 + Currently, when `keep_empty_feature=False` and `strategy="constant"`, + empty features are not dropped. This behaviour will change in version + 1.8. Set `keep_empty_feature=True` to preserve this behaviour. + + + missing_values + int + NaN + The placeholder for the missing values. All occurrences of + `missing_values` will be imputed. For pandas' dataframes with + nullable integer dtypes with missing values, `missing_values` + can be set to either `np.nan` or `pd.NA` + + + strategy + str or Callable + "mean" + The imputation strategy + + - If "mean", then replace missing values using the mean along + each column. Can only be used with numeric data + - If "median", then replace missing values using the median along + each column. Can only be used with numeric data + - If "most_frequent", then replace missing using the most frequent + value along each column. Can be used with strings or numeric data + If there is more than one such value, only the smallest is returned + - If "constant", then replace missing values with fill_value. Can be + used with strings or numeric data + - If an instance of Callable, then replace missing values using the + scalar statistic returned by running the callable over a dense 1d + array containing non-missing values of each column + + .. versionadded:: 0.20 + strategy="constant" for fixed value imputation + + .. versionadded:: 1.5 + strategy=callable for custom value imputation + +openml-python +python +scikit-learn +sklearn +sklearn_1.7.2 + + + + classifier + + + 205909 +1159 +TESTa8a9af7f85sklearn.dummy.DummyClassifier +sklearn.DummyClassifier +sklearn.dummy.DummyClassifier +1 +openml==0.16.0,sklearn==1.7.2 +DummyClassifier makes predictions that ignore the input features. + +This classifier serves as a simple baseline to compare against other more +complex classifiers. + +The specific behavior of the baseline is selected with the `strategy` +parameter. + +All strategies make predictions that ignore the input feature values passed +as the `X` argument to `fit` and `predict`. The predictions, however, +typically depend on values observed in the `y` parameter passed to `fit`. + +Note that the "stratified" and "uniform" strategies lead to +non-deterministic predictions that can be rendered deterministic by setting +the `random_state` parameter if needed. The other strategies are naturally +deterministic and, once fit, always return the same constant prediction +for any value of `X`. +2025-11-15T10:47:52 +English +sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + constant + int or str or array + null + The explicit constant as predicted by the "constant" strategy. This + parameter is useful only for the "constant" strategy. + + + random_state + int + null + Controls the randomness to generate the predictions when + ``strategy='stratified'`` or ``strategy='uniform'`` + Pass an int for reproducible output across multiple function calls + See :term:`Glossary <random_state>` + + + strategy + + "prior" + + +openml-python +python +scikit-learn +sklearn +sklearn_1.7.2 + + +openml-python +python +scikit-learn +sklearn +sklearn_1.7.2 + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/datasets.functions.get_dataset b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/datasets.functions.get_dataset new file mode 100644 index 000000000..e69de29bb diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/flows.functions.get_flow b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/flows.functions.get_flow new file mode 100644 index 000000000..e69de29bb diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/runs.functions.get_run b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/runs.functions.get_run new file mode 100644 index 000000000..e69de29bb diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/tasks.functions.get_task b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/locks/tasks.functions.get_task new file mode 100644 index 000000000..e69de29bb diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.arff b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.arff new file mode 100644 index 000000000..abd1e10eb --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.arff @@ -0,0 +1,776 @@ +@relation diabetes_splits + +@attribute type {TRAIN,TEST} +@attribute rowid numeric +@attribute repeat numeric +@attribute fold numeric + +@data +TEST,53,0,0 +TEST,455,0,0 +TEST,101,0,0 +TEST,57,0,0 +TEST,363,0,0 +TEST,16,0,0 +TEST,496,0,0 +TEST,271,0,0 +TEST,511,0,0 +TEST,280,0,0 +TEST,88,0,0 +TEST,270,0,0 +TEST,210,0,0 +TEST,665,0,0 +TEST,156,0,0 +TEST,360,0,0 +TEST,323,0,0 +TEST,528,0,0 +TEST,113,0,0 +TEST,96,0,0 +TEST,107,0,0 +TEST,166,0,0 +TEST,413,0,0 +TEST,565,0,0 +TEST,251,0,0 +TEST,339,0,0 +TEST,317,0,0 +TEST,564,0,0 +TEST,707,0,0 +TEST,518,0,0 +TEST,513,0,0 +TEST,95,0,0 +TEST,371,0,0 +TEST,10,0,0 +TEST,242,0,0 +TEST,727,0,0 +TEST,755,0,0 +TEST,143,0,0 +TEST,763,0,0 +TEST,110,0,0 +TEST,562,0,0 +TEST,481,0,0 +TEST,126,0,0 +TEST,414,0,0 +TEST,258,0,0 +TEST,186,0,0 +TEST,256,0,0 +TEST,713,0,0 +TEST,254,0,0 +TEST,275,0,0 +TEST,197,0,0 +TEST,542,0,0 +TEST,4,0,0 +TEST,428,0,0 +TEST,387,0,0 +TEST,244,0,0 +TEST,265,0,0 +TEST,722,0,0 +TEST,58,0,0 +TEST,79,0,0 +TEST,161,0,0 +TEST,619,0,0 +TEST,349,0,0 +TEST,702,0,0 +TEST,756,0,0 +TEST,216,0,0 +TEST,396,0,0 +TEST,180,0,0 +TEST,489,0,0 +TEST,669,0,0 +TEST,567,0,0 +TEST,283,0,0 +TEST,624,0,0 +TEST,647,0,0 +TEST,310,0,0 +TEST,127,0,0 +TEST,142,0,0 +TEST,680,0,0 +TEST,499,0,0 +TEST,545,0,0 +TEST,8,0,0 +TEST,404,0,0 +TEST,698,0,0 +TEST,671,0,0 +TEST,644,0,0 +TEST,505,0,0 +TEST,135,0,0 +TEST,613,0,0 +TEST,469,0,0 +TEST,507,0,0 +TEST,80,0,0 +TEST,28,0,0 +TEST,751,0,0 +TEST,337,0,0 +TEST,679,0,0 +TEST,348,0,0 +TEST,332,0,0 +TEST,120,0,0 +TEST,708,0,0 +TEST,429,0,0 +TEST,276,0,0 +TEST,534,0,0 +TEST,350,0,0 +TEST,610,0,0 +TEST,399,0,0 +TEST,516,0,0 +TEST,29,0,0 +TEST,559,0,0 +TEST,267,0,0 +TEST,192,0,0 +TEST,355,0,0 +TEST,451,0,0 +TEST,124,0,0 +TEST,392,0,0 +TEST,141,0,0 +TEST,64,0,0 +TEST,47,0,0 +TEST,20,0,0 +TEST,657,0,0 +TEST,325,0,0 +TEST,733,0,0 +TEST,288,0,0 +TEST,576,0,0 +TEST,302,0,0 +TEST,160,0,0 +TEST,227,0,0 +TEST,395,0,0 +TEST,400,0,0 +TEST,735,0,0 +TEST,705,0,0 +TEST,523,0,0 +TEST,290,0,0 +TEST,19,0,0 +TEST,485,0,0 +TEST,551,0,0 +TEST,571,0,0 +TEST,548,0,0 +TEST,645,0,0 +TEST,724,0,0 +TEST,703,0,0 +TEST,261,0,0 +TEST,488,0,0 +TEST,83,0,0 +TEST,51,0,0 +TEST,397,0,0 +TEST,720,0,0 +TEST,345,0,0 +TEST,655,0,0 +TEST,344,0,0 +TEST,449,0,0 +TEST,99,0,0 +TEST,762,0,0 +TEST,42,0,0 +TEST,578,0,0 +TEST,386,0,0 +TEST,108,0,0 +TEST,690,0,0 +TEST,31,0,0 +TEST,281,0,0 +TEST,201,0,0 +TEST,23,0,0 +TEST,699,0,0 +TEST,618,0,0 +TEST,379,0,0 +TEST,617,0,0 +TEST,585,0,0 +TEST,752,0,0 +TEST,693,0,0 +TEST,182,0,0 +TEST,11,0,0 +TEST,457,0,0 +TEST,477,0,0 +TEST,508,0,0 +TEST,685,0,0 +TEST,553,0,0 +TEST,358,0,0 +TEST,111,0,0 +TEST,581,0,0 +TEST,38,0,0 +TEST,35,0,0 +TEST,74,0,0 +TEST,324,0,0 +TEST,208,0,0 +TEST,361,0,0 +TEST,438,0,0 +TEST,486,0,0 +TEST,549,0,0 +TEST,666,0,0 +TEST,482,0,0 +TEST,173,0,0 +TEST,492,0,0 +TEST,279,0,0 +TEST,656,0,0 +TEST,580,0,0 +TEST,224,0,0 +TEST,639,0,0 +TEST,484,0,0 +TEST,36,0,0 +TEST,653,0,0 +TEST,506,0,0 +TEST,114,0,0 +TEST,130,0,0 +TEST,106,0,0 +TEST,341,0,0 +TEST,590,0,0 +TEST,321,0,0 +TEST,378,0,0 +TEST,730,0,0 +TEST,259,0,0 +TEST,638,0,0 +TEST,417,0,0 +TEST,425,0,0 +TEST,424,0,0 +TEST,697,0,0 +TEST,470,0,0 +TEST,204,0,0 +TEST,72,0,0 +TEST,568,0,0 +TEST,103,0,0 +TEST,230,0,0 +TEST,497,0,0 +TEST,441,0,0 +TEST,539,0,0 +TEST,533,0,0 +TEST,326,0,0 +TEST,90,0,0 +TEST,331,0,0 +TEST,311,0,0 +TEST,427,0,0 +TEST,416,0,0 +TEST,44,0,0 +TEST,131,0,0 +TEST,696,0,0 +TEST,448,0,0 +TEST,346,0,0 +TEST,134,0,0 +TEST,700,0,0 +TEST,359,0,0 +TEST,426,0,0 +TEST,75,0,0 +TEST,603,0,0 +TEST,689,0,0 +TEST,320,0,0 +TEST,194,0,0 +TEST,709,0,0 +TEST,398,0,0 +TEST,212,0,0 +TEST,343,0,0 +TEST,169,0,0 +TEST,695,0,0 +TEST,249,0,0 +TEST,676,0,0 +TEST,732,0,0 +TRAIN,611,0,0 +TRAIN,714,0,0 +TRAIN,27,0,0 +TRAIN,365,0,0 +TRAIN,164,0,0 +TRAIN,411,0,0 +TRAIN,94,0,0 +TRAIN,123,0,0 +TRAIN,243,0,0 +TRAIN,473,0,0 +TRAIN,466,0,0 +TRAIN,629,0,0 +TRAIN,59,0,0 +TRAIN,203,0,0 +TRAIN,563,0,0 +TRAIN,52,0,0 +TRAIN,370,0,0 +TRAIN,476,0,0 +TRAIN,701,0,0 +TRAIN,81,0,0 +TRAIN,226,0,0 +TRAIN,150,0,0 +TRAIN,437,0,0 +TRAIN,584,0,0 +TRAIN,181,0,0 +TRAIN,22,0,0 +TRAIN,741,0,0 +TRAIN,684,0,0 +TRAIN,596,0,0 +TRAIN,179,0,0 +TRAIN,636,0,0 +TRAIN,60,0,0 +TRAIN,495,0,0 +TRAIN,335,0,0 +TRAIN,297,0,0 +TRAIN,375,0,0 +TRAIN,601,0,0 +TRAIN,670,0,0 +TRAIN,504,0,0 +TRAIN,435,0,0 +TRAIN,479,0,0 +TRAIN,7,0,0 +TRAIN,445,0,0 +TRAIN,442,0,0 +TRAIN,278,0,0 +TRAIN,625,0,0 +TRAIN,40,0,0 +TRAIN,715,0,0 +TRAIN,667,0,0 +TRAIN,140,0,0 +TRAIN,9,0,0 +TRAIN,658,0,0 +TRAIN,419,0,0 +TRAIN,316,0,0 +TRAIN,459,0,0 +TRAIN,289,0,0 +TRAIN,409,0,0 +TRAIN,736,0,0 +TRAIN,623,0,0 +TRAIN,73,0,0 +TRAIN,649,0,0 +TRAIN,630,0,0 +TRAIN,637,0,0 +TRAIN,675,0,0 +TRAIN,456,0,0 +TRAIN,248,0,0 +TRAIN,384,0,0 +TRAIN,91,0,0 +TRAIN,719,0,0 +TRAIN,104,0,0 +TRAIN,175,0,0 +TRAIN,760,0,0 +TRAIN,468,0,0 +TRAIN,535,0,0 +TRAIN,514,0,0 +TRAIN,712,0,0 +TRAIN,367,0,0 +TRAIN,501,0,0 +TRAIN,643,0,0 +TRAIN,688,0,0 +TRAIN,728,0,0 +TRAIN,100,0,0 +TRAIN,115,0,0 +TRAIN,305,0,0 +TRAIN,446,0,0 +TRAIN,129,0,0 +TRAIN,615,0,0 +TRAIN,87,0,0 +TRAIN,462,0,0 +TRAIN,515,0,0 +TRAIN,250,0,0 +TRAIN,557,0,0 +TRAIN,382,0,0 +TRAIN,33,0,0 +TRAIN,257,0,0 +TRAIN,620,0,0 +TRAIN,151,0,0 +TRAIN,102,0,0 +TRAIN,209,0,0 +TRAIN,232,0,0 +TRAIN,652,0,0 +TRAIN,205,0,0 +TRAIN,753,0,0 +TRAIN,319,0,0 +TRAIN,674,0,0 +TRAIN,12,0,0 +TRAIN,309,0,0 +TRAIN,146,0,0 +TRAIN,433,0,0 +TRAIN,569,0,0 +TRAIN,460,0,0 +TRAIN,329,0,0 +TRAIN,380,0,0 +TRAIN,660,0,0 +TRAIN,600,0,0 +TRAIN,537,0,0 +TRAIN,170,0,0 +TRAIN,725,0,0 +TRAIN,252,0,0 +TRAIN,595,0,0 +TRAIN,529,0,0 +TRAIN,391,0,0 +TRAIN,298,0,0 +TRAIN,766,0,0 +TRAIN,369,0,0 +TRAIN,664,0,0 +TRAIN,46,0,0 +TRAIN,662,0,0 +TRAIN,686,0,0 +TRAIN,55,0,0 +TRAIN,731,0,0 +TRAIN,472,0,0 +TRAIN,362,0,0 +TRAIN,461,0,0 +TRAIN,517,0,0 +TRAIN,218,0,0 +TRAIN,14,0,0 +TRAIN,597,0,0 +TRAIN,262,0,0 +TRAIN,579,0,0 +TRAIN,18,0,0 +TRAIN,614,0,0 +TRAIN,420,0,0 +TRAIN,268,0,0 +TRAIN,296,0,0 +TRAIN,710,0,0 +TRAIN,586,0,0 +TRAIN,500,0,0 +TRAIN,672,0,0 +TRAIN,3,0,0 +TRAIN,612,0,0 +TRAIN,530,0,0 +TRAIN,406,0,0 +TRAIN,554,0,0 +TRAIN,582,0,0 +TRAIN,313,0,0 +TRAIN,32,0,0 +TRAIN,682,0,0 +TRAIN,711,0,0 +TRAIN,494,0,0 +TRAIN,291,0,0 +TRAIN,157,0,0 +TRAIN,439,0,0 +TRAIN,412,0,0 +TRAIN,723,0,0 +TRAIN,185,0,0 +TRAIN,54,0,0 +TRAIN,372,0,0 +TRAIN,206,0,0 +TRAIN,207,0,0 +TRAIN,17,0,0 +TRAIN,93,0,0 +TRAIN,187,0,0 +TRAIN,13,0,0 +TRAIN,122,0,0 +TRAIN,198,0,0 +TRAIN,282,0,0 +TRAIN,651,0,0 +TRAIN,15,0,0 +TRAIN,294,0,0 +TRAIN,61,0,0 +TRAIN,550,0,0 +TRAIN,650,0,0 +TRAIN,503,0,0 +TRAIN,376,0,0 +TRAIN,415,0,0 +TRAIN,430,0,0 +TRAIN,632,0,0 +TRAIN,155,0,0 +TRAIN,721,0,0 +TRAIN,604,0,0 +TRAIN,758,0,0 +TRAIN,171,0,0 +TRAIN,334,0,0 +TRAIN,353,0,0 +TRAIN,631,0,0 +TRAIN,336,0,0 +TRAIN,1,0,0 +TRAIN,540,0,0 +TRAIN,737,0,0 +TRAIN,49,0,0 +TRAIN,223,0,0 +TRAIN,71,0,0 +TRAIN,729,0,0 +TRAIN,184,0,0 +TRAIN,21,0,0 +TRAIN,233,0,0 +TRAIN,408,0,0 +TRAIN,6,0,0 +TRAIN,132,0,0 +TRAIN,431,0,0 +TRAIN,524,0,0 +TRAIN,366,0,0 +TRAIN,67,0,0 +TRAIN,199,0,0 +TRAIN,147,0,0 +TRAIN,681,0,0 +TRAIN,663,0,0 +TRAIN,137,0,0 +TRAIN,538,0,0 +TRAIN,43,0,0 +TRAIN,167,0,0 +TRAIN,82,0,0 +TRAIN,602,0,0 +TRAIN,307,0,0 +TRAIN,340,0,0 +TRAIN,266,0,0 +TRAIN,318,0,0 +TRAIN,678,0,0 +TRAIN,552,0,0 +TRAIN,588,0,0 +TRAIN,178,0,0 +TRAIN,532,0,0 +TRAIN,152,0,0 +TRAIN,63,0,0 +TRAIN,593,0,0 +TRAIN,217,0,0 +TRAIN,607,0,0 +TRAIN,136,0,0 +TRAIN,622,0,0 +TRAIN,231,0,0 +TRAIN,191,0,0 +TRAIN,295,0,0 +TRAIN,97,0,0 +TRAIN,421,0,0 +TRAIN,333,0,0 +TRAIN,642,0,0 +TRAIN,677,0,0 +TRAIN,229,0,0 +TRAIN,543,0,0 +TRAIN,592,0,0 +TRAIN,583,0,0 +TRAIN,92,0,0 +TRAIN,634,0,0 +TRAIN,215,0,0 +TRAIN,159,0,0 +TRAIN,76,0,0 +TRAIN,541,0,0 +TRAIN,314,0,0 +TRAIN,546,0,0 +TRAIN,574,0,0 +TRAIN,168,0,0 +TRAIN,158,0,0 +TRAIN,434,0,0 +TRAIN,292,0,0 +TRAIN,86,0,0 +TRAIN,716,0,0 +TRAIN,526,0,0 +TRAIN,154,0,0 +TRAIN,743,0,0 +TRAIN,139,0,0 +TRAIN,691,0,0 +TRAIN,483,0,0 +TRAIN,444,0,0 +TRAIN,34,0,0 +TRAIN,407,0,0 +TRAIN,761,0,0 +TRAIN,606,0,0 +TRAIN,121,0,0 +TRAIN,628,0,0 +TRAIN,177,0,0 +TRAIN,465,0,0 +TRAIN,272,0,0 +TRAIN,172,0,0 +TRAIN,754,0,0 +TRAIN,410,0,0 +TRAIN,394,0,0 +TRAIN,133,0,0 +TRAIN,118,0,0 +TRAIN,474,0,0 +TRAIN,520,0,0 +TRAIN,405,0,0 +TRAIN,591,0,0 +TRAIN,452,0,0 +TRAIN,5,0,0 +TRAIN,742,0,0 +TRAIN,654,0,0 +TRAIN,646,0,0 +TRAIN,594,0,0 +TRAIN,641,0,0 +TRAIN,105,0,0 +TRAIN,138,0,0 +TRAIN,246,0,0 +TRAIN,125,0,0 +TRAIN,162,0,0 +TRAIN,299,0,0 +TRAIN,493,0,0 +TRAIN,85,0,0 +TRAIN,694,0,0 +TRAIN,418,0,0 +TRAIN,354,0,0 +TRAIN,745,0,0 +TRAIN,383,0,0 +TRAIN,263,0,0 +TRAIN,443,0,0 +TRAIN,234,0,0 +TRAIN,24,0,0 +TRAIN,673,0,0 +TRAIN,767,0,0 +TRAIN,747,0,0 +TRAIN,225,0,0 +TRAIN,749,0,0 +TRAIN,726,0,0 +TRAIN,37,0,0 +TRAIN,50,0,0 +TRAIN,364,0,0 +TRAIN,240,0,0 +TRAIN,381,0,0 +TRAIN,487,0,0 +TRAIN,683,0,0 +TRAIN,144,0,0 +TRAIN,566,0,0 +TRAIN,56,0,0 +TRAIN,521,0,0 +TRAIN,401,0,0 +TRAIN,165,0,0 +TRAIN,202,0,0 +TRAIN,253,0,0 +TRAIN,211,0,0 +TRAIN,315,0,0 +TRAIN,25,0,0 +TRAIN,351,0,0 +TRAIN,510,0,0 +TRAIN,304,0,0 +TRAIN,68,0,0 +TRAIN,238,0,0 +TRAIN,41,0,0 +TRAIN,704,0,0 +TRAIN,237,0,0 +TRAIN,116,0,0 +TRAIN,183,0,0 +TRAIN,374,0,0 +TRAIN,390,0,0 +TRAIN,174,0,0 +TRAIN,475,0,0 +TRAIN,668,0,0 +TRAIN,570,0,0 +TRAIN,573,0,0 +TRAIN,220,0,0 +TRAIN,740,0,0 +TRAIN,30,0,0 +TRAIN,587,0,0 +TRAIN,153,0,0 +TRAIN,575,0,0 +TRAIN,190,0,0 +TRAIN,536,0,0 +TRAIN,235,0,0 +TRAIN,599,0,0 +TRAIN,26,0,0 +TRAIN,247,0,0 +TRAIN,193,0,0 +TRAIN,598,0,0 +TRAIN,527,0,0 +TRAIN,748,0,0 +TRAIN,241,0,0 +TRAIN,640,0,0 +TRAIN,274,0,0 +TRAIN,163,0,0 +TRAIN,45,0,0 +TRAIN,112,0,0 +TRAIN,65,0,0 +TRAIN,300,0,0 +TRAIN,453,0,0 +TRAIN,327,0,0 +TRAIN,368,0,0 +TRAIN,219,0,0 +TRAIN,718,0,0 +TRAIN,738,0,0 +TRAIN,436,0,0 +TRAIN,286,0,0 +TRAIN,322,0,0 +TRAIN,70,0,0 +TRAIN,62,0,0 +TRAIN,633,0,0 +TRAIN,744,0,0 +TRAIN,440,0,0 +TRAIN,490,0,0 +TRAIN,463,0,0 +TRAIN,98,0,0 +TRAIN,222,0,0 +TRAIN,385,0,0 +TRAIN,556,0,0 +TRAIN,764,0,0 +TRAIN,128,0,0 +TRAIN,522,0,0 +TRAIN,389,0,0 +TRAIN,648,0,0 +TRAIN,84,0,0 +TRAIN,509,0,0 +TRAIN,454,0,0 +TRAIN,687,0,0 +TRAIN,388,0,0 +TRAIN,347,0,0 +TRAIN,547,0,0 +TRAIN,706,0,0 +TRAIN,78,0,0 +TRAIN,255,0,0 +TRAIN,661,0,0 +TRAIN,69,0,0 +TRAIN,306,0,0 +TRAIN,284,0,0 +TRAIN,264,0,0 +TRAIN,188,0,0 +TRAIN,149,0,0 +TRAIN,717,0,0 +TRAIN,109,0,0 +TRAIN,221,0,0 +TRAIN,214,0,0 +TRAIN,635,0,0 +TRAIN,750,0,0 +TRAIN,117,0,0 +TRAIN,189,0,0 +TRAIN,555,0,0 +TRAIN,692,0,0 +TRAIN,627,0,0 +TRAIN,196,0,0 +TRAIN,145,0,0 +TRAIN,558,0,0 +TRAIN,621,0,0 +TRAIN,195,0,0 +TRAIN,342,0,0 +TRAIN,356,0,0 +TRAIN,377,0,0 +TRAIN,236,0,0 +TRAIN,338,0,0 +TRAIN,66,0,0 +TRAIN,458,0,0 +TRAIN,746,0,0 +TRAIN,403,0,0 +TRAIN,200,0,0 +TRAIN,480,0,0 +TRAIN,213,0,0 +TRAIN,77,0,0 +TRAIN,432,0,0 +TRAIN,239,0,0 +TRAIN,269,0,0 +TRAIN,423,0,0 +TRAIN,260,0,0 +TRAIN,560,0,0 +TRAIN,422,0,0 +TRAIN,39,0,0 +TRAIN,471,0,0 +TRAIN,328,0,0 +TRAIN,450,0,0 +TRAIN,393,0,0 +TRAIN,312,0,0 +TRAIN,739,0,0 +TRAIN,301,0,0 +TRAIN,608,0,0 +TRAIN,765,0,0 +TRAIN,502,0,0 +TRAIN,330,0,0 +TRAIN,734,0,0 +TRAIN,277,0,0 +TRAIN,616,0,0 +TRAIN,285,0,0 +TRAIN,148,0,0 +TRAIN,572,0,0 +TRAIN,757,0,0 +TRAIN,402,0,0 +TRAIN,464,0,0 +TRAIN,287,0,0 +TRAIN,357,0,0 +TRAIN,561,0,0 +TRAIN,605,0,0 +TRAIN,589,0,0 +TRAIN,352,0,0 +TRAIN,544,0,0 +TRAIN,293,0,0 +TRAIN,609,0,0 +TRAIN,308,0,0 +TRAIN,447,0,0 +TRAIN,478,0,0 +TRAIN,531,0,0 +TRAIN,659,0,0 +TRAIN,119,0,0 +TRAIN,491,0,0 +TRAIN,373,0,0 +TRAIN,228,0,0 +TRAIN,626,0,0 +TRAIN,303,0,0 +TRAIN,577,0,0 +TRAIN,245,0,0 +TRAIN,89,0,0 +TRAIN,273,0,0 +TRAIN,176,0,0 +TRAIN,519,0,0 +TRAIN,498,0,0 +TRAIN,467,0,0 +TRAIN,0,0,0 +TRAIN,759,0,0 +TRAIN,2,0,0 +TRAIN,525,0,0 +TRAIN,512,0,0 +TRAIN,48,0,0 \ No newline at end of file diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.pkl.py3 b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.pkl.py3 new file mode 100644 index 000000000..1090d01a3 Binary files /dev/null and b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/datasplits.pkl.py3 differ diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/task.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/task.xml new file mode 100644 index 000000000..534ac97ac --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/119/task.xml @@ -0,0 +1,36 @@ + + 119 + Task 119: diabetes (Supervised Classification) + 1 + Supervised Classification + + +20 +class + + + +5 +holdout +https://test.openml.org/api_splits/get/119/Task_119_splits.arff +1 + +33 +true + + + + + + + + + +ARFF + + + + + + + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/733/task.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/733/task.xml new file mode 100644 index 000000000..fd50a7108 --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/org/openml/test/tasks/733/task.xml @@ -0,0 +1,32 @@ + + 733 + Task 733: quake (Supervised Regression) + 2 + Supervised Regression + + +123 +richter + + + +7 +crossvalidation +https://test.openml.org//api_splits/get/733/Task_733_splits.arff +1 +10 + + + + + + + + +ARFF + + + + + + diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/description.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/description.xml new file mode 100644 index 000000000..0113fa017 --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/description.xml @@ -0,0 +1,34 @@ + + 119 + + Python_3.10.11. Sklearn_1.7.2. NumPy_2.2.6. SciPy_1.15.3. + + openml-python + Sklearn_1.7.2. + + + usercpu_time_millis_training + 0.0 + + + wall_clock_time_millis_training + 4.98652458190918 + + + usercpu_time_millis_testing + 0.0 + + + usercpu_time_millis + 0.0 + + + wall_clock_time_millis_testing + 2.9909610748291016 + + + wall_clock_time_millis + 7.977485656738281 + + + \ No newline at end of file diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/flow.xml b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/flow.xml new file mode 100644 index 000000000..b9369596a --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/flow.xml @@ -0,0 +1,511 @@ + + TESTa8a9af7f85sklearn.pipeline.Pipeline(cat_handling=sklearn.compose._column_transformer.ColumnTransformer(cat=sklearn.preprocessing._encoders.OneHotEncoder),imp=sklearn.impute._base.SimpleImputer,classifier=sklearn.dummy.DummyClassifier) + sklearn.Pipeline(ColumnTransformer,SimpleImputer,DummyClassifier) + sklearn.pipeline.Pipeline + openml==0.16.0,sklearn==1.7.2 + A sequence of data transformers with an optional final predictor. + +`Pipeline` allows you to sequentially apply a list of transformers to +preprocess the data and, if desired, conclude the sequence with a final +:term:`predictor` for predictive modeling. + +Intermediate steps of the pipeline must be transformers, that is, they +must implement `fit` and `transform` methods. +The final :term:`estimator` only needs to implement `fit`. +The transformers in the pipeline can be cached using ``memory`` argument. + +The purpose of the pipeline is to assemble several steps that can be +cross-validated together while setting different parameters. For this, it +enables setting parameters of the various steps using their names and the +parameter name separated by a `'__'`, as in the example below. A step's +estimator may be replaced entirely by setting the parameter with its name +to another estimator, or a transformer removed by setting it to +`'passthrough'` or `None`. + +For an example use case of `Pipeline` combined with +:class:`~s... + English + sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + memory + str or object with the joblib + null + Used to cache the fitted transformers of the pipeline. The last step + will never be cached, even if it is a transformer. By default, no + caching is performed. If a string is given, it is the path to the + caching directory. Enabling caching triggers a clone of the transformers + before fitting. Therefore, the transformer instance given to the + pipeline cannot be inspected directly. Use the attribute ``named_steps`` + or ``steps`` to inspect estimators within the pipeline. Caching the + transformers is advantageous when fitting is time consuming. See + :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py` + for an example on how to enable caching + + + steps + list of tuples + [{"oml-python:serialized_object": "component_reference", "value": {"key": "cat_handling", "step_name": "cat_handling"}}, {"oml-python:serialized_object": "component_reference", "value": {"key": "imp", "step_name": "imp"}}, {"oml-python:serialized_object": "component_reference", "value": {"key": "classifier", "step_name": "classifier"}}] + List of (name of step, estimator) tuples that are to be chained in + sequential order. To be compatible with the scikit-learn API, all steps + must define `fit`. All non-last steps must also define `transform`. See + :ref:`Combining Estimators <combining_estimators>` for more details + + + transform_input + list of str + null + The names of the :term:`metadata` parameters that should be transformed by the + pipeline before passing it to the step consuming it + + This enables transforming some input arguments to ``fit`` (other than ``X``) + to be transformed by the steps of the pipeline up to the step which requires + them. Requirement is defined via :ref:`metadata routing <metadata_routing>` + For instance, this can be used to pass a validation set through the pipeline + + You can only set this if metadata routing is enabled, which you + can enable using ``sklearn.set_config(enable_metadata_routing=True)`` + + .. versionadded:: 1.6 + + + verbose + bool + false + If True, the time elapsed while fitting each step will be printed as it + is completed. + + + cat_handling + + TESTa8a9af7f85sklearn.compose._column_transformer.ColumnTransformer(cat=sklearn.preprocessing._encoders.OneHotEncoder) + sklearn.ColumnTransformer + sklearn.compose._column_transformer.ColumnTransformer + openml==0.16.0,sklearn==1.7.2 + Applies transformers to columns of an array or pandas DataFrame. + +This estimator allows different columns or column subsets of the input +to be transformed separately and the features generated by each transformer +will be concatenated to form a single feature space. +This is useful for heterogeneous or columnar data, to combine several +feature extraction mechanisms or transformations into a single transformer. + English + sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + force_int_remainder_cols + bool + "deprecated" + This parameter has no effect + + .. note:: + If you do not access the list of columns for the remainder columns + in the `transformers_` fitted attribute, you do not need to set + this parameter + + .. versionadded:: 1.5 + + .. versionchanged:: 1.7 + The default value for `force_int_remainder_cols` will change from + `True` to `False` in version 1.7 + + .. deprecated:: 1.7 + `force_int_remainder_cols` is deprecated and will be removed in 1.9. + + + n_jobs + int + null + Number of jobs to run in parallel + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context + ``-1`` means using all processors. See :term:`Glossary <n_jobs>` + for more details + + + remainder + "passthrough" + + + sparse_threshold + float + 0.3 + If the output of the different transformers contains sparse matrices, + these will be stacked as a sparse matrix if the overall density is + lower than this value. Use ``sparse_threshold=0`` to always return + dense. When the transformed output consists of all dense data, the + stacked result will be dense, and this keyword will be ignored + + + transformer_weights + dict + null + Multiplicative weights for features per transformer. The output of the + transformer is multiplied by these weights. Keys are transformer names, + values the weights + + + transformers + list of tuples + [{"oml-python:serialized_object": "component_reference", "value": {"key": "cat", "step_name": "cat", "argument_1": {"oml-python:serialized_object": "function", "value": "tests.test_runs.test_run._cat_col_selector"}}}] + List of (name, transformer, columns) tuples specifying the + transformer objects to be applied to subsets of the data + + + verbose + bool + false + If True, the time elapsed while fitting each transformer will be + printed as it is completed + + + verbose_feature_names_out + bool + true + - If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix + all feature names with the name of the transformer that generated that + feature. It is equivalent to setting + `verbose_feature_names_out="{transformer_name}__{feature_name}"` + - If False, :meth:`ColumnTransformer.get_feature_names_out` will not + prefix any feature names and will error if feature names are not + unique + - If ``Callable[[str, str], str]``, + :meth:`ColumnTransformer.get_feature_names_out` will rename all the features + using the name of the transformer. The first argument of the callable is the + transformer name and the second argument is the feature name. The returned + string will be the new feature name + - If ``str``, it must be a string ready for formatting. The given string will + be formatted using two field names: ``transformer_name`` and ``feature_name`` + e.g. ``"{feature_name}__{transformer_name}"``. See :meth:`str.format` method + from the standard ... + + + cat + + TESTa8a9af7f85sklearn.preprocessing._encoders.OneHotEncoder + sklearn.OneHotEncoder + sklearn.preprocessing._encoders.OneHotEncoder + openml==0.16.0,sklearn==1.7.2 + Encode categorical features as a one-hot numeric array. + +The input to this transformer should be an array-like of integers or +strings, denoting the values taken on by categorical (discrete) features. +The features are encoded using a one-hot (aka 'one-of-K' or 'dummy') +encoding scheme. This creates a binary column for each category and +returns a sparse matrix or dense array (depending on the ``sparse_output`` +parameter). + +By default, the encoder derives the categories based on the unique values +in each feature. Alternatively, you can also specify the `categories` +manually. + +This encoding is needed for feeding categorical data to many scikit-learn +estimators, notably linear models and SVMs with the standard kernels. + +Note: a one-hot encoding of y labels should use a LabelBinarizer +instead. + English + sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + categories + 'auto' or a list of array + "auto" + Categories (unique values) per feature: + + - 'auto' : Determine categories automatically from the training data + - list : ``categories[i]`` holds the categories expected in the ith + column. The passed categories should not mix strings and numeric + values within a single feature, and should be sorted in case of + numeric values + + The used categories can be found in the ``categories_`` attribute + + .. versionadded:: 0.20 + +drop : {'first', 'if_binary'} or an array-like of shape (n_features,), default=None + Specifies a methodology to use to drop one of the categories per + feature. This is useful in situations where perfectly collinear + features cause problems, such as when feeding the resulting data + into an unregularized linear regression model + + However, dropping one category breaks the symmetry of the original + representation and can therefore induce a bias in downstream models, + for instance for penalized linear classification or regression models + + + drop + null + + + dtype + number type + {"oml-python:serialized_object": "type", "value": "np.float64"} + Desired dtype of output + +handle_unknown : {'error', 'ignore', 'infrequent_if_exist', 'warn'}, default='error' + Specifies the way unknown categories are handled during :meth:`transform` + + - 'error' : Raise an error if an unknown category is present during transform + - 'ignore' : When an unknown category is encountered during + transform, the resulting one-hot encoded columns for this feature + will be all zeros. In the inverse transform, an unknown category + will be denoted as None + - 'infrequent_if_exist' : When an unknown category is encountered + during transform, the resulting one-hot encoded columns for this + feature will map to the infrequent category if it exists. The + infrequent category will be mapped to the last position in the + encoding. During inverse transform, an unknown category will be + mapped to the category denoted `'infrequent'` if it exists. If the + `'infrequent'` category does not exist, then :meth:`transform` an... + + + feature_name_combiner + "concat" + + + handle_unknown + "ignore" + + + max_categories + int + null + Specifies an upper limit to the number of output features for each input + feature when considering infrequent categories. If there are infrequent + categories, `max_categories` includes the category representing the + infrequent categories along with the frequent categories. If `None`, + there is no limit to the number of output features + + .. versionadded:: 1.1 + Read more in the :ref:`User Guide <encoder_infrequent_categories>` + +feature_name_combiner : "concat" or callable, default="concat" + Callable with signature `def callable(input_feature, category)` that returns a + string. This is used to create feature names to be returned by + :meth:`get_feature_names_out` + + `"concat"` concatenates encoded feature name and category with + `feature + "_" + str(category)`.E.g. feature X with values 1, 6, 7 create + feature names `X_1, X_6, X_7` + + .. versionadded:: 1.3 + + + min_frequency + int or float + null + Specifies the minimum frequency below which a category will be + considered infrequent + + - If `int`, categories with a smaller cardinality will be considered + infrequent + + - If `float`, categories with a smaller cardinality than + `min_frequency * n_samples` will be considered infrequent + + .. versionadded:: 1.1 + Read more in the :ref:`User Guide <encoder_infrequent_categories>` + + + sparse_output + bool + true + When ``True``, it returns a :class:`scipy.sparse.csr_matrix`, + i.e. a sparse matrix in "Compressed Sparse Row" (CSR) format + + .. versionadded:: 1.2 + `sparse` was renamed to `sparse_output` + + openml-python + sklearn + scikit-learn + python + sklearn_1.7.2 + + + openml-python + sklearn + scikit-learn + python + sklearn_1.7.2 + + + + imp + + TESTa8a9af7f85sklearn.impute._base.SimpleImputer + sklearn.SimpleImputer + sklearn.impute._base.SimpleImputer + openml==0.16.0,sklearn==1.7.2 + Univariate imputer for completing missing values with simple strategies. + +Replace missing values using a descriptive statistic (e.g. mean, median, or +most frequent) along each column, or using a constant value. + English + sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + add_indicator + bool + false + If True, a :class:`MissingIndicator` transform will stack onto output + of the imputer's transform. This allows a predictive estimator + to account for missingness despite imputation. If a feature has no + missing values at fit/train time, the feature won't appear on + the missing indicator even if there are missing values at + transform/test time + + + copy + bool + true + If True, a copy of X will be created. If False, imputation will + be done in-place whenever possible. Note that, in the following cases, + a new copy will always be made, even if `copy=False`: + + - If `X` is not an array of floating values; + - If `X` is encoded as a CSR matrix; + - If `add_indicator=True` + + + fill_value + str or numerical value + null + When strategy == "constant", `fill_value` is used to replace all + occurrences of missing_values. For string or object data types, + `fill_value` must be a string + If `None`, `fill_value` will be 0 when imputing numerical + data and "missing_value" for strings or object data types + + + keep_empty_features + bool + false + If True, features that consist exclusively of missing values when + `fit` is called are returned in results when `transform` is called + The imputed value is always `0` except when `strategy="constant"` + in which case `fill_value` will be used instead + + .. versionadded:: 1.2 + + .. versionchanged:: 1.6 + Currently, when `keep_empty_feature=False` and `strategy="constant"`, + empty features are not dropped. This behaviour will change in version + 1.8. Set `keep_empty_feature=True` to preserve this behaviour. + + + missing_values + int + NaN + The placeholder for the missing values. All occurrences of + `missing_values` will be imputed. For pandas' dataframes with + nullable integer dtypes with missing values, `missing_values` + can be set to either `np.nan` or `pd.NA` + + + strategy + str or Callable + "mean" + The imputation strategy + + - If "mean", then replace missing values using the mean along + each column. Can only be used with numeric data + - If "median", then replace missing values using the median along + each column. Can only be used with numeric data + - If "most_frequent", then replace missing using the most frequent + value along each column. Can be used with strings or numeric data + If there is more than one such value, only the smallest is returned + - If "constant", then replace missing values with fill_value. Can be + used with strings or numeric data + - If an instance of Callable, then replace missing values using the + scalar statistic returned by running the callable over a dense 1d + array containing non-missing values of each column + + .. versionadded:: 0.20 + strategy="constant" for fixed value imputation + + .. versionadded:: 1.5 + strategy=callable for custom value imputation + + openml-python + sklearn + scikit-learn + python + sklearn_1.7.2 + + + + classifier + + TESTa8a9af7f85sklearn.dummy.DummyClassifier + sklearn.DummyClassifier + sklearn.dummy.DummyClassifier + openml==0.16.0,sklearn==1.7.2 + DummyClassifier makes predictions that ignore the input features. + +This classifier serves as a simple baseline to compare against other more +complex classifiers. + +The specific behavior of the baseline is selected with the `strategy` +parameter. + +All strategies make predictions that ignore the input feature values passed +as the `X` argument to `fit` and `predict`. The predictions, however, +typically depend on values observed in the `y` parameter passed to `fit`. + +Note that the "stratified" and "uniform" strategies lead to +non-deterministic predictions that can be rendered deterministic by setting +the `random_state` parameter if needed. The other strategies are naturally +deterministic and, once fit, always return the same constant prediction +for any value of `X`. + English + sklearn==1.7.2 +numpy>=1.22.0 +scipy>=1.8.0 +joblib>=1.2.0 +threadpoolctl>=3.1.0 + + constant + int or str or array + null + The explicit constant as predicted by the "constant" strategy. This + parameter is useful only for the "constant" strategy. + + + random_state + int + null + Controls the randomness to generate the predictions when + ``strategy='stratified'`` or ``strategy='uniform'`` + Pass an int for reproducible output across multiple function calls + See :term:`Glossary <random_state>` + + + strategy + "prior" + + openml-python + sklearn + scikit-learn + python + sklearn_1.7.2 + + + openml-python + sklearn + scikit-learn + python + sklearn_1.7.2 + \ No newline at end of file diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/model.pkl b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/model.pkl new file mode 100644 index 000000000..9d86f24ed Binary files /dev/null and b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/model.pkl differ diff --git a/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/predictions.arff b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/predictions.arff new file mode 100644 index 000000000..259a8376d --- /dev/null +++ b/openml/tests.test_runs.test_run.TestRun.test_offline_and_online_run_identical/runs/162880322156049587682907720944831787496/predictions.arff @@ -0,0 +1,271 @@ +% Python_3.10.11. +% Sklearn_1.7.2. +% NumPy_2.2.6. +% SciPy_1.15.3. +% Sat Nov 15 15:47:38 2025 +% Created by run_flow_on_task +@RELATION openml_task_119_predictions + +@ATTRIBUTE repeat NUMERIC +@ATTRIBUTE fold NUMERIC +@ATTRIBUTE sample NUMERIC +@ATTRIBUTE row_id NUMERIC +@ATTRIBUTE prediction {tested_negative, tested_positive} +@ATTRIBUTE correct {tested_negative, tested_positive} +@ATTRIBUTE confidence.tested_negative NUMERIC +@ATTRIBUTE confidence.tested_positive NUMERIC + +@DATA +0,0,0,53,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,455,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,101,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,57,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,363,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,16,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,496,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,271,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,511,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,280,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,88,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,270,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,210,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,665,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,156,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,360,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,323,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,528,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,113,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,96,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,107,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,166,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,413,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,565,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,251,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,339,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,317,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,564,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,707,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,518,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,513,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,95,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,371,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,10,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,242,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,727,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,755,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,143,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,763,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,110,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,562,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,481,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,126,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,414,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,258,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,186,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,256,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,713,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,254,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,275,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,197,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,542,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,4,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,428,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,387,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,244,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,265,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,722,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,58,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,79,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,161,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,619,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,349,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,702,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,756,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,216,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,396,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,180,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,489,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,669,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,567,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,283,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,624,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,647,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,310,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,127,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,142,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,680,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,499,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,545,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,8,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,404,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,698,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,671,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,644,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,505,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,135,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,613,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,469,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,507,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,80,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,28,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,751,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,337,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,679,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,348,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,332,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,120,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,708,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,429,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,276,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,534,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,350,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,610,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,399,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,516,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,29,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,559,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,267,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,192,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,355,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,451,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,124,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,392,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,141,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,64,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,47,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,20,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,657,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,325,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,733,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,288,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,576,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,302,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,160,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,227,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,395,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,400,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,735,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,705,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,523,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,290,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,19,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,485,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,551,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,571,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,548,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,645,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,724,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,703,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,261,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,488,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,83,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,51,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,397,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,720,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,345,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,655,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,344,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,449,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,99,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,762,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,42,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,578,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,386,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,108,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,690,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,31,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,281,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,201,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,23,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,699,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,618,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,379,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,617,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,585,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,752,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,693,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,182,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,11,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,457,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,477,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,508,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,685,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,553,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,358,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,111,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,581,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,38,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,35,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,74,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,324,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,208,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,361,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,438,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,486,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,549,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,666,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,482,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,173,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,492,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,279,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,656,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,580,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,224,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,639,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,484,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,36,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,653,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,506,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,114,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,130,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,106,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,341,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,590,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,321,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,378,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,730,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,259,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,638,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,417,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,425,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,424,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,697,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,470,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,204,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,72,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,568,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,103,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,230,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,497,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,441,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,539,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,533,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,326,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,90,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,331,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,311,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,427,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,416,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,44,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,131,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,696,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,448,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,346,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,134,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,700,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,359,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,426,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,75,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,603,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,689,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,320,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,194,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,709,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,398,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,212,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,343,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,169,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,695,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,249,tested_negative,tested_negative,0.658252427184466,0.341747572815534 +0,0,0,676,tested_negative,tested_positive,0.658252427184466,0.341747572815534 +0,0,0,732,tested_negative,tested_positive,0.658252427184466,0.341747572815534 diff --git a/tests/files/misc/features_with_whitespaces.xml.pkl b/tests/files/misc/features_with_whitespaces.xml.pkl new file mode 100644 index 000000000..f6a775cc7 Binary files /dev/null and b/tests/files/misc/features_with_whitespaces.xml.pkl differ diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 86a4d3f57..b1a24b540 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -230,6 +230,16 @@ def test_get_data_corrupt_pickle(self): assert isinstance(xy, pd.DataFrame) assert xy.shape == (150, 5) + def test_get_missing_summary(self): + dataset = openml.datasets.get_dataset(31) # credit-g + summary = dataset.get_missing_summary() + + assert "n_missing_total" in summary + assert "missing_per_column" in summary + assert isinstance(summary["missing_per_column"], dict) + assert isinstance(summary["n_missing_total"], (int, np.integer)) + assert summary["n_missing_total"] >= 0 + def test_lazy_loading_metadata(self): # Initial Setup did_cache_dir = openml.utils._create_cache_directory_for_id(