2626 n_jobs = _n_jobs_docstring ,
2727)
2828class SPIDER (BasePreprocessSampler ):
29- """Perform filtering and over-sampling using Selective Pre-processing of
30- Imbalanced Data (SPIDER) sampling approach for imbalanced datasets.
29+ """Perform filtering and over-sampling using SPIDER algorithm.
30+
31+ This object is an implementation of SPIDER - Selective Pre-processing of
32+ Imbalanced Data as presented in [1]_ and [2]_.
3133
3234 Read more in the :ref:`User Guide <combine>`.
3335
3436 Parameters
3537 ----------
3638 {sampling_strategy}
3739
38- kind : str (default='weak')
39- Possible choices are:
40-
41- ``'weak'``: Amplify noisy minority class samples based on the
42- number of safe majority neighbors.
40+ kind_sel : {{"weak", "relabel", "strong"}}, default='weak'
41+ Strategy to use in order to preprocess samples in the SPIDER sampling.
4342
44- ``'relabel'``: Perform ``'weak'`` amplification and then relabel
45- noisy majority neighbors for each noisy minority class sample.
46-
47- ``'strong'``: Amplify all minority class samples by an extra
48- ``additional_neighbors`` if the sample is classified incorrectly
49- by its neighbors. Otherwise each minority sample is amplified in a
50- manner akin to ``'weak'`` amplification.
43+ - If ``'weak'``, amplify noisy minority class samples based on the
44+ number of safe majority neighbors.
45+ - If ``'relabel'``, perform ``'weak'`` amplification and then relabel
46+ noisy majority neighbors for each noisy minority class sample.
47+ - If ``'strong'``, amplify all minority class samples by an extra
48+ ``additional_neighbors`` if the sample is classified incorrectly
49+ by its neighbors. Otherwise each minority sample is amplified in a
50+ manner akin to ``'weak'`` amplification.
5151
5252 n_neighbors : int or object, optional (default=3)
5353 If ``int``, number of nearest neighbours to used to construct synthetic
5454 samples. If object, an estimator that inherits from
5555 :class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
56- find the k_neighbors .
56+ find the nearest-neighbors .
5757
5858 additional_neighbors : int, optional (default=2)
59- The number to add to amplified samples during if ``kind `` is
59+ The number to add to amplified samples during if ``kind_sel `` is
6060 ``'strong'``. This has no effect otherwise.
6161
6262 {n_jobs}
6363
64+ See Also
65+ --------
66+ NeighborhoodClearingRule : Undersample by editing noisy samples.
67+
68+ RandomOverSampler : Random oversample the dataset.
69+
6470 Notes
6571 -----
6672 The implementation is based on [1]_ and [2]_.
6773
6874 Supports multi-class resampling. A one-vs.-rest scheme is used.
6975
70- See also
71- --------
72- NeighborhoodClearingRule : Undersample by editing noisy samples.
73-
74- RandomOverSampler : Random oversample the dataset.
75-
7676 References
7777 ----------
7878 .. [1] Stefanowski, J., & Wilk, S, "Selective pre-processing of imbalanced
@@ -107,13 +107,13 @@ class SPIDER(BasePreprocessSampler):
107107 def __init__ (
108108 self ,
109109 sampling_strategy = "auto" ,
110- kind = "weak" ,
110+ kind_sel = "weak" ,
111111 n_neighbors = 3 ,
112112 additional_neighbors = 2 ,
113113 n_jobs = None ,
114114 ):
115115 super ().__init__ (sampling_strategy = sampling_strategy )
116- self .kind = kind
116+ self .kind_sel = kind_sel
117117 self .n_neighbors = n_neighbors
118118 self .additional_neighbors = additional_neighbors
119119 self .n_jobs = n_jobs
@@ -124,10 +124,10 @@ def _validate_estimator(self):
124124 "n_neighbors" , self .n_neighbors , additional_neighbor = 1 )
125125 self .nn_ .set_params (** {"n_jobs" : self .n_jobs })
126126
127- if self .kind not in SEL_KIND :
127+ if self .kind_sel not in SEL_KIND :
128128 raise ValueError (
129129 'The possible "kind" of algorithm are "weak", "relabel",'
130- ' and "strong". Got {} instead.' .format (self .kind )
130+ ' and "strong". Got {} instead.' .format (self .kind_sel )
131131 )
132132
133133 if self .additional_neighbors < 1 :
@@ -258,17 +258,17 @@ def _fit_resample(self, X, y):
258258 X_class_noisy = _safe_indexing (X , class_noisy_indices )
259259 y_class_noisy = y [class_noisy_indices ]
260260
261- if self .kind in ("weak" , "relabel" ):
261+ if self .kind_sel in ("weak" , "relabel" ):
262262 nn_indices = self ._amplify (X_class_noisy , y_class_noisy )
263263
264- if self .kind == "relabel" :
264+ if self .kind_sel == "relabel" :
265265 relabel_mask = np .isin (nn_indices , discard_indices )
266266 relabel_indices = np .unique (nn_indices [relabel_mask ])
267267 self ._y [relabel_indices ] = class_sample
268268 discard_indices = np .setdiff1d (
269269 discard_indices , relabel_indices )
270270
271- elif self .kind == "strong" :
271+ elif self .kind_sel == "strong" :
272272 class_safe_indices = np .flatnonzero (is_class & is_safe )
273273 X_class_safe = _safe_indexing (X , class_safe_indices )
274274 y_class_safe = y [class_safe_indices ]
@@ -287,7 +287,7 @@ def _fit_resample(self, X, y):
287287 y_incorrect = y_class_noisy [~ is_correct ]
288288 self ._amplify (X_incorrect , y_incorrect , additional = True )
289289 else :
290- raise NotImplementedError (self .kind )
290+ raise NotImplementedError (self .kind_sel )
291291
292292 discard_mask = np .ones_like (y , dtype = bool )
293293 try :
0 commit comments