Skip to content

Commit 19969f6

Browse files
authored
[MRG] Make imbalance (#119)
* PEP8 and doc for make_imbalance * Add logger for the module
1 parent 4266580 commit 19969f6

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

doc/api.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,21 @@ Functions
118118

119119
pipeline.make_pipeline
120120

121+
122+
.. _datasets_ref:
123+
124+
Datasets
125+
========
126+
127+
.. automodule:: imblearn.datasets
128+
:no-members:
129+
:no-inherited-members:
130+
131+
.. currentmodule:: imblearn
132+
133+
Functions
134+
---------
135+
.. autosummary::
136+
:toctree: generated/
137+
138+
datasets.make_imbalance

imblearn/datasets/imbalance.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
"""Transform a dataset into an imbalanced dataset."""
22

3+
import logging
4+
35
import numpy as np
46

57
from collections import Counter
68

79
from sklearn.utils import check_X_y
810
from sklearn.utils import check_random_state
911

12+
LOGGER = logging.getLogger(__name__)
13+
14+
1015
def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
1116
"""Turns a dataset into an imbalanced dataset at specific ratio.
1217
A simple toy dataset to visualize clustering and classification
@@ -20,10 +25,10 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
2025
y : ndarray, shape (n_samples, )
2126
Corresponding label for each sample in X.
2227
23-
ratio : float,
24-
The desired ratio given by the number of samples in
25-
the minority class over the the number of samples in
26-
the majority class.
28+
ratio : float,
29+
The desired ratio given by the number of samples in
30+
the minority class over the the number of samples in
31+
the majority class. Thus the ratio should be in the interval [0., 1.]
2732
2833
min_c_ : str or int, optional (default=None)
2934
The identifier of the class to be the minority class.
@@ -42,6 +47,7 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
4247
4348
y_resampled : ndarray, shape (n_samples_new)
4449
The corresponding label of `X_resampled`
50+
4551
"""
4652
if ratio <= 0.0 or ratio >= 1.0:
4753
raise ValueError('ratio value must be such that 0.0 < ratio < 1.0')
@@ -52,12 +58,16 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
5258

5359
stats_c_ = Counter(y)
5460

61+
LOGGER.info('The original target distribution in the dataset is: %s',
62+
stats_c_)
63+
5564
if min_c_ is None:
5665
min_c_ = min(stats_c_, key=stats_c_.get)
5766

5867
n_min_samples = int(np.count_nonzero(y != min_c_) * ratio)
5968
if n_min_samples > stats_c_[min_c_]:
60-
raise ValueError('Current imbalance ratio of data is lower than desired ratio!')
69+
raise ValueError('Current imbalance ratio of data is lower than'
70+
' desired ratio!')
6171
if n_min_samples == 0:
6272
raise ValueError('Not enough samples for desired ratio!')
6373

@@ -68,7 +78,9 @@ def make_imbalance(X, y, ratio, min_c_=None, random_state=None):
6878
idx_min = random_state.choice(idx_min, size=n_min_samples, replace=False)
6979
idx = np.concatenate((idx_min, idx_maj), axis=0)
7080

71-
X_resampled, y_resampled = X[idx,:], y[idx]
81+
X_resampled, y_resampled = X[idx, :], y[idx]
82+
83+
LOGGER.info('Make the dataset imbalanced: %s', Counter(y_resampled))
7284

7385
return X_resampled, y_resampled
7486

0 commit comments

Comments
 (0)