DOC make example works with latest scikit-learn (#951)

glemaitre · web-flow · commit 021bbdf6c7de · 2022-12-03T21:10:23.000+01:00
diff --git a/doc/conf.py b/doc/conf.py
@@ -82,11 +82,6 @@
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
 
-# -- Options for math equations -----------------------------------------------
-
-extensions.append("sphinx.ext.imgmath")
-imgmath_image_format = "svg"
-
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst
@@ -6,6 +6,12 @@ Version 0.10.0 (ongoing)
 Changelog
 ---------
 
+Compatibility
+.............
+
+- Maintenance release for be compatible with scikit-learn >= 1.0.2.
+  :pr:`946`, :pr:`947`, :pr:`949` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Deprecation
 ...........
 
@@ -19,6 +25,14 @@ Deprecation
   estimator where `n_jobs` is set.
   :pr:`887` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- The parameter `base_estimator` is deprecated and will be removed in version
+  0.12. It is impacted the following classes:
+  :class:`~imblearn.ensemble.BalancedBaggingClassifier`,
+  :class:`~imblearn.ensemble.EasyEnsembleClassifier`,
+  :class:`~imblearn.ensemble.RUSBoostClassifier`.
+  :pr:`946` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+
 Enhancements
 ............
 
diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py
@@ -53,8 +53,8 @@
 # --------------------------------
 #
 # `sampling_strategy` can be given a `float`. For **under-sampling
-# methods**, it corresponds to the ratio :math:`\\alpha_{us}` defined by
-# :math:`N_{rM} = \\alpha_{us} \\times N_{m}` where :math:`N_{rM}` and
+# methods**, it corresponds to the ratio :math:`\alpha_{us}` defined by
+# :math:`N_{rM} = \alpha_{us} \times N_{m}` where :math:`N_{rM}` and
 # :math:`N_{m}` are the number of samples in the majority class after
 # resampling and the number of samples in the minority class, respectively.
 
@@ -77,7 +77,7 @@
 
 # %% [markdown]
 # For **over-sampling methods**, it correspond to the ratio
-# :math:`\\alpha_{os}` defined by :math:`N_{rm} = \\alpha_{os} \\times N_{M}`
+# :math:`\alpha_{os}` defined by :math:`N_{rm} = \alpha_{os} \times N_{M}`
 # where :math:`N_{rm}` and :math:`N_{M}` are the number of samples in the
 # minority class after resampling and the number of samples in the majority
 # class, respectively.
diff --git a/examples/applications/plot_impact_imbalanced_classes.py b/examples/applications/plot_impact_imbalanced_classes.py
@@ -345,7 +345,7 @@
 bag_clf = make_pipeline(
     preprocessor_tree,
     BalancedBaggingClassifier(
-        base_estimator=HistGradientBoostingClassifier(random_state=42),
+        estimator=HistGradientBoostingClassifier(random_state=42),
         n_estimators=10,
         random_state=42,
         n_jobs=2,
diff --git a/examples/applications/plot_over_sampling_benchmark_lfw.py b/examples/applications/plot_over_sampling_benchmark_lfw.py
@@ -74,7 +74,6 @@
 
 from sklearn.neighbors import KNeighborsClassifier
 
-# %%
 from imblearn import FunctionSampler
 from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
 from imblearn.pipeline import make_pipeline
@@ -145,6 +144,7 @@
 ax.set_xlim([0, 1])
 ax.set_ylim([0, 1])
 sns.despine(offset=10, ax=ax)
+plt.legend(loc="lower right", fontsize=16)
 plt.tight_layout()
 plt.show()
 
diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py
@@ -67,21 +67,21 @@
 
 # %%
 import seaborn as sns
-from sklearn.metrics import plot_confusion_matrix
+from sklearn.metrics import ConfusionMatrixDisplay
 
 sns.set_context("poster")
 
-disp = plot_confusion_matrix(tree, X_test, y_test, colorbar=False)
+disp = ConfusionMatrixDisplay.from_estimator(tree, X_test, y_test, colorbar=False)
 _ = disp.ax_.set_title("Decision tree")
 
 # %% [markdown]
 # Classification using bagging classifier with and without sampling
 # -----------------------------------------------------------------
 #
-# Instead of using a single tree, we will check if an ensemble of decsion tree
+# Instead of using a single tree, we will check if an ensemble of decision tree
 # can actually alleviate the issue induced by the class imbalancing. First, we
 # will use a bagging classifier and its counter part which internally uses a
-# random under-sampling to balanced each boostrap sample.
+# random under-sampling to balanced each bootstrap sample.
 
 # %%
 from sklearn.ensemble import BaggingClassifier
@@ -117,10 +117,14 @@
 import matplotlib.pyplot as plt
 
 fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
-plot_confusion_matrix(bagging, X_test, y_test, ax=axs[0], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(
+    bagging, X_test, y_test, ax=axs[0], colorbar=False
+)
 axs[0].set_title("Bagging")
 
-plot_confusion_matrix(balanced_bagging, X_test, y_test, ax=axs[1], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(
+    balanced_bagging, X_test, y_test, ax=axs[1], colorbar=False
+)
 axs[1].set_title("Balanced Bagging")
 
 fig.tight_layout()
@@ -150,7 +154,7 @@
 # %% [markdown]
 # Similarly to the previous experiment, the balanced classifier outperform the
 # classifier which learn from imbalanced bootstrap samples. In addition, random
-# forest outsperforms the bagging classifier.
+# forest outperforms the bagging classifier.
 
 # %%
 print("Random Forest classifier performance:")
@@ -166,10 +170,10 @@
 
 # %%
 fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
-plot_confusion_matrix(rf, X_test, y_test, ax=axs[0], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test, ax=axs[0], colorbar=False)
 axs[0].set_title("Random forest")
 
-plot_confusion_matrix(brf, X_test, y_test, ax=axs[1], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(brf, X_test, y_test, ax=axs[1], colorbar=False)
 axs[1].set_title("Balanced random forest")
 
 fig.tight_layout()
@@ -187,12 +191,12 @@
 
 from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier
 
-base_estimator = AdaBoostClassifier(n_estimators=10)
-eec = EasyEnsembleClassifier(n_estimators=10, base_estimator=base_estimator)
+estimator = AdaBoostClassifier(n_estimators=10)
+eec = EasyEnsembleClassifier(n_estimators=10, estimator=estimator)
 eec.fit(X_train, y_train)
 y_pred_eec = eec.predict(X_test)
 
-rusboost = RUSBoostClassifier(n_estimators=10, base_estimator=base_estimator)
+rusboost = RUSBoostClassifier(n_estimators=10, estimator=estimator)
 rusboost.fit(X_train, y_train)
 y_pred_rusboost = rusboost.predict(X_test)
 
@@ -211,9 +215,11 @@
 # %%
 fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
 
-plot_confusion_matrix(eec, X_test, y_test, ax=axs[0], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(eec, X_test, y_test, ax=axs[0], colorbar=False)
 axs[0].set_title("Easy Ensemble")
-plot_confusion_matrix(rusboost, X_test, y_test, ax=axs[1], colorbar=False)
+ConfusionMatrixDisplay.from_estimator(
+    rusboost, X_test, y_test, ax=axs[1], colorbar=False
+)
 axs[1].set_title("RUSBoost classifier")
 
 fig.tight_layout()
diff --git a/examples/evaluation/plot_classification_report.py b/examples/evaluation/plot_classification_report.py
@@ -4,7 +4,7 @@
 =============================================
 
 Specific metrics have been developed to evaluate classifier which has been
-trained using imbalanced data. "mod:`imblearn` provides a classification report
+trained using imbalanced data. :mod:`imblearn` provides a classification report
 similar to :mod:`sklearn`, with additional metrics specific to imbalanced
 learning problem.
 """
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
@@ -93,7 +93,7 @@
 # %%
 import matplotlib.pyplot as plt
 
-fig, ax = plt.subplots(figsize=(7, 5))
+fig, ax = plt.subplots(figsize=(7, 7))
 ax.plot(param_range, test_scores_mean, label="SMOTE")
 ax.fill_between(
     param_range,
@@ -111,13 +111,13 @@
 )
 
 fig.suptitle("Validation Curve with SMOTE-CART")
-ax.set_xlabel("k_neighbors")
+ax.set_xlabel("Number of neighbors")
 ax.set_ylabel("Cohen's kappa")
 
 # make nice plotting
 sns.despine(ax=ax, offset=10)
 ax.set_xlim([1, 10])
 ax.set_ylim([0.4, 0.8])
-ax.legend(loc="lower right")
+ax.legend(loc="lower right", fontsize=16)
 plt.tight_layout()
 plt.show()
diff --git a/examples/over-sampling/plot_comparison_over_sampling.py b/examples/over-sampling/plot_comparison_over_sampling.py
@@ -262,6 +262,8 @@ def plot_decision_function(X, y, clf, ax, title=None):
 # density.
 
 # %%
+from sklearn.cluster import MiniBatchKMeans
+
 from imblearn.over_sampling import SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 
 X, y = create_dataset(n_samples=5000, weights=(0.01, 0.05, 0.94), class_sep=0.8)
@@ -272,7 +274,9 @@ def plot_decision_function(X, y, clf, ax, title=None):
     SMOTE(random_state=0),
     BorderlineSMOTE(random_state=0, kind="borderline-1"),
     BorderlineSMOTE(random_state=0, kind="borderline-2"),
-    KMeansSMOTE(random_state=0),
+    KMeansSMOTE(
+        kmeans_estimator=MiniBatchKMeans(n_init=1, random_state=0), random_state=0
+    ),
     SVMSMOTE(random_state=0),
 ]
 
diff --git a/examples/under-sampling/plot_comparison_under_sampling.py b/examples/under-sampling/plot_comparison_under_sampling.py
@@ -103,6 +103,7 @@ def plot_decision_function(X, y, clf, ax, title=None):
 
 # %%
 import matplotlib.pyplot as plt
+from sklearn.cluster import MiniBatchKMeans
 
 from imblearn import FunctionSampler
 from imblearn.pipeline import make_pipeline
@@ -112,7 +113,9 @@ def plot_decision_function(X, y, clf, ax, title=None):
 
 samplers = {
     FunctionSampler(),  # identity resampler
-    ClusterCentroids(random_state=0),
+    ClusterCentroids(
+        estimator=MiniBatchKMeans(n_init=1, random_state=0), random_state=0
+    ),
 }
 
 fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15, 15))
diff --git a/examples/under-sampling/plot_illustration_nearmiss.py b/examples/under-sampling/plot_illustration_nearmiss.py
@@ -31,7 +31,7 @@ def make_plot_despine(ax):
     ax.set_yticks(np.arange(0, 3.6, 0.5))
     ax.set_xlabel(r"$X_1$")
     ax.set_ylabel(r"$X_2$")
-    ax.legend(loc="upper left")
+    ax.legend(loc="upper left", fontsize=16)
 
 
 # %% [markdown]
diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py
@@ -174,7 +174,7 @@ class BalancedBaggingClassifier(BaggingClassifier):
 
         .. versionadded:: 0.9
 
-    feature_names_in_ : ndarray of shape (n_features_in_,)
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
         Names of features seen during `fit`. Defined only when `X` has feature
         names that are all strings.
 
diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py
@@ -125,7 +125,7 @@ class EasyEnsembleClassifier(BaggingClassifier):
 
         .. versionadded:: 0.9
 
-    feature_names_in_ : ndarray of shape (n_features_in_,)
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
         Names of features seen during `fit`. Defined only when `X` has feature
         names that are all strings.
 
diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py
@@ -296,7 +296,7 @@ class labels (multi-output problem).
 
         .. versionadded:: 0.9
 
-    feature_names_in_ : ndarray of shape (n_features_in_,)
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
         Names of features seen during `fit`. Defined only when `X` has feature
         names that are all strings.
 
diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
@@ -125,6 +125,12 @@ class RUSBoostClassifier(AdaBoostClassifier):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.9
+
     See Also
     --------
     BalancedBaggingClassifier : Bagging classifier for which each base