11"""Transform a dataset into an imbalanced dataset."""
22
3+ import logging
4+
35import numpy as np
46
57from collections import Counter
68
79from sklearn .utils import check_X_y
810from sklearn .utils import check_random_state
911
12+ LOGGER = logging .getLogger (__name__ )
13+
14+
1015def make_imbalance (X , y , ratio , min_c_ = None , random_state = None ):
1116 """Turns a dataset into an imbalanced dataset at specific ratio.
1217 A simple toy dataset to visualize clustering and classification
@@ -20,10 +25,10 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
2025 y : ndarray, shape (n_samples, )
2126 Corresponding label for each sample in X.
2227
23- ratio : float,
24- The desired ratio given by the number of samples in
25- the minority class over the the number of samples in
26- the majority class.
28+ ratio : float,
29+ The desired ratio given by the number of samples in
30+ the minority class over the the number of samples in
31+ the majority class. Thus the ratio should be in the interval [0., 1.]
2732
2833 min_c_ : str or int, optional (default=None)
2934 The identifier of the class to be the minority class.
@@ -42,6 +47,7 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
4247
4348 y_resampled : ndarray, shape (n_samples_new)
4449 The corresponding label of `X_resampled`
50+
4551 """
4652 if ratio <= 0.0 or ratio >= 1.0 :
4753 raise ValueError ('ratio value must be such that 0.0 < ratio < 1.0' )
@@ -52,12 +58,16 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
5258
5359 stats_c_ = Counter (y )
5460
61+ LOGGER .info ('The original target distribution in the dataset is: %s' ,
62+ stats_c_ )
63+
5564 if min_c_ is None :
5665 min_c_ = min (stats_c_ , key = stats_c_ .get )
5766
5867 n_min_samples = int (np .count_nonzero (y != min_c_ ) * ratio )
5968 if n_min_samples > stats_c_ [min_c_ ]:
60- raise ValueError ('Current imbalance ratio of data is lower than desired ratio!' )
69+ raise ValueError ('Current imbalance ratio of data is lower than'
70+ ' desired ratio!' )
6171 if n_min_samples == 0 :
6272 raise ValueError ('Not enough samples for desired ratio!' )
6373
@@ -68,7 +78,9 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
6878 idx_min = random_state .choice (idx_min , size = n_min_samples , replace = False )
6979 idx = np .concatenate ((idx_min , idx_maj ), axis = 0 )
7080
71- X_resampled , y_resampled = X [idx ,:], y [idx ]
81+ X_resampled , y_resampled = X [idx , :], y [idx ]
82+
83+ LOGGER .info ('Make the dataset imbalanced: %s' , Counter (y_resampled ))
7284
7385 return X_resampled , y_resampled
7486
0 commit comments