22from __future__ import print_function
33from __future__ import division
44
5+ import warnings
6+
57import numpy as np
68
79from collections import Counter
@@ -148,6 +150,17 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
148150 # Compute the distance considering the farthest neighbour
149151 dist_avg_vec = np .sum (dist_vec [:, - self .size_ngh :], axis = 1 )
150152
153+ self .logger .debug ('The size of the distance matrix is %s' ,
154+ dist_vec .shape )
155+ self .logger .debug ('The size of the samples that can be selected is %s' ,
156+ X [y == key ].shape )
157+
158+ if dist_vec .shape [0 ] != X [y == key ].shape [0 ]:
159+ raise RuntimeError ('The samples to be selected do not correspond'
160+ ' to the distance matrix given. Ensure that'
161+ ' both `X[y == key]` and `dist_vec` are'
162+ ' related.' )
163+
151164 # Sort the list of distance and get the index
152165 if sel_strategy == 'nearest' :
153166 sort_way = False
@@ -160,6 +173,12 @@ def _selection_dist_based(self, X, y, dist_vec, num_samples, key,
160173 key = dist_avg_vec .__getitem__ ,
161174 reverse = sort_way )
162175
176+ # Throw a warning to tell the user that we did not have enough samples
177+ # to select and that we just select everything
178+ warnings .warn ('The number of the samples to be selected is larger than'
179+ ' the number of samples available. The balancing ratio'
180+ ' cannot be ensure and all samples will be returned.' )
181+
163182 # Select the desired number of samples
164183 sel_idx = sorted_idx [:num_samples ]
165184
@@ -291,8 +310,8 @@ def _sample(self, X, y):
291310 n_neighbors = self .size_ngh )
292311
293312 sel_x , sel_y , idx_tmp = self ._selection_dist_based (
294- X ,
295- y ,
313+ sub_samples_x ,
314+ sub_samples_y ,
296315 dist_vec ,
297316 num_samples ,
298317 key ,
0 commit comments