Skip to content

Commit 23f1ffc

Browse files
glemaitrechkoar
authored andcommitted
Fix issue #124
1 parent 19969f6 commit 23f1ffc

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

imblearn/under_sampling/nearmiss.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from __future__ import print_function
33
from __future__ import division
44

5+
import warnings
6+
57
import numpy as np
68

79
from collections import Counter
@@ -148,6 +150,17 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
148150
# Compute the distance considering the farthest neighbour
149151
dist_avg_vec = np.sum(dist_vec[:, -self.size_ngh:], axis=1)
150152

153+
self.logger.debug('The size of the distance matrix is %s',
154+
dist_vec.shape)
155+
self.logger.debug('The size of the samples that can be selected is %s',
156+
X[y == key].shape)
157+
158+
if dist_vec.shape[0] != X[y == key].shape[0]:
159+
raise RuntimeError('The samples to be selected do not correspond'
160+
' to the distance matrix given. Ensure that'
161+
' both `X[y == key]` and `dist_vec` are'
162+
' related.')
163+
151164
# Sort the list of distance and get the index
152165
if sel_strategy == 'nearest':
153166
sort_way = False
@@ -160,6 +173,12 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
160173
key=dist_avg_vec.__getitem__,
161174
reverse=sort_way)
162175

176+
# Throw a warning to tell the user that we did not have enough samples
177+
# to select and that we just select everything
178+
warnings.warn('The number of the samples to be selected is larger than'
179+
' the number of samples available. The balancing ratio'
180+
' cannot be ensure and all samples will be returned.')
181+
163182
# Select the desired number of samples
164183
sel_idx = sorted_idx[:num_samples]
165184

@@ -291,8 +310,8 @@ def _sample(self, X, y):
291310
n_neighbors=self.size_ngh)
292311

293312
sel_x, sel_y, idx_tmp = self._selection_dist_based(
294-
X,
295-
y,
313+
sub_samples_x,
314+
sub_samples_y,
296315
dist_vec,
297316
num_samples,
298317
key,
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)