diff --git a/CHANGES.txt b/CHANGES.txt index d8595c9788b6c9332264c5c7f77aebee12540201..145e2710994aec36246b8baef35993e2c9d9dd5d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -98,6 +98,7 @@ v<0.7.8>, <01/30/2019> -- Bug fix for kNN (#158). v<0.7.8>, <03/14/2020> -- Add VAE (implemented by Dr Andrij Vasylenko). v<0.7.8>, <03/17/2020> -- Add LODA (adapted from tilitools). v<0.7.8.1>, <04/07/2020> -- Hot fix for SOD. +v<0.7.8.2>, <04/14/2020> -- Bug Fix for LODA. diff --git a/docs/requirements.txt b/docs/requirements.txt index 484c8e578e417deecb92d824c197865740224d74..716b250bffbcc66e018390ad66e2d776813dd8ae 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -11,7 +11,5 @@ scikit_learn>=0.19.1 six sphinxcontrib-bibtex suod -tensorflow==1.15.2 -# tensorflow comment out for large memory consumption -# https://github.com/readthedocs/readthedocs.org/issues/6537 +tensorflow xgboost diff --git a/pyod/models/loda.py b/pyod/models/loda.py index 6b626c6b642ae7a718d97e01ccc83d11fd7fda3d..bc2c9c92ccbfc2440aca76604618519373a91ed8 100644 --- a/pyod/models/loda.py +++ b/pyod/models/loda.py @@ -101,8 +101,8 @@ class LODA(BaseDetector): pred_scores[:, 0] += -self.weights[i] * np.log( self.histograms_[i, inds]) - self.decision_scores_ = pred_scores / self.n_random_cuts - self._process_decision_scores() + self.decision_scores_ = (pred_scores / self.n_random_cuts).ravel() + self._process_decision_scores() return self diff --git a/pyod/models/sod.py b/pyod/models/sod.py index f87141ef5f62139cea792ac10a7027d90f4c2487..790db5f7909e370ab4ab9fd0569746d419b73c10 100644 --- a/pyod/models/sod.py +++ b/pyod/models/sod.py @@ -28,13 +28,13 @@ def _snn_imp(ind, ref_set_): """ n = ind.shape[0] - _count = np.zeros(shape=(n, ref_set_), dtype=nb.uint32) + _count = np.zeros(shape=(n, ref_set_), dtype=np.uint32) for i in nb.prange(n): - temp = np.empty(n, dtype=nb.int16) + temp = np.empty(n, dtype=np.uint32) test_element_set = set(ind[i]) for j in nb.prange(n): temp[j] = len(set(ind[j]).intersection(test_element_set)) - temp[i] = np.iinfo(np.uint16).max + temp[i] = np.iinfo(np.uint32).max _count[i] = np.argsort(temp)[::-1][1:ref_set_ + 1] return _count diff --git a/pyod/models/xgbod.py b/pyod/models/xgbod.py index dc27f1cb23e6e95c02d23f767fbec70398689d99..b4498e77d3d8db7f3a731c2ba192ec5ca832f246 100644 --- a/pyod/models/xgbod.py +++ b/pyod/models/xgbod.py @@ -20,6 +20,7 @@ from .lof import LOF from .iforest import IForest from .hbos import HBOS from .ocsvm import OCSVM +from .loda import LODA from ..utils.utility import check_parameter from ..utils.utility import check_detector @@ -29,8 +30,8 @@ from ..utils.utility import precision_n_scores class XGBOD(BaseDetector): r"""XGBOD class for outlier detection. - It first use the passed in unsupervised outlier detectors to extract - richer representation of the data and then concatenate the newly + It first uses the passed in unsupervised outlier detectors to extract + richer representation of the data and then concatenates the newly generated features to the original feature for constructing the augmented feature space. An XGBoost classifier is then applied on this augmented feature space. Read more in the :cite:`zhao2018xgbod`. @@ -41,7 +42,7 @@ class XGBOD(BaseDetector): The list of pyod detectors passed in for unsupervised learning standardization_flag_list : list, optional (default=None) - The list of boolean flags for indicating whether to take + The list of boolean flags for indicating whether to perform standardization for each detector. max_depth : int @@ -197,20 +198,20 @@ class XGBOD(BaseDetector): standardization_flag_list = [] # predefined range of n_neighbors for KNN, AvgKNN, and LOF - k_range = [1, 3, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] + k_range = [1, 3, 5, 10, 20, 30, 40, 50] # validate the value of k k_range = [k for k in k_range if k < X.shape[0]] for k in k_range: estimator_list.append(KNN(n_neighbors=k, method='largest')) - estimator_list.append(KNN(n_neighbors=k, method='mean')) + # estimator_list.append(KNN(n_neighbors=k, method='mean')) estimator_list.append(LOF(n_neighbors=k)) - standardization_flag_list.append(True) + # standardization_flag_list.append(True) standardization_flag_list.append(True) standardization_flag_list.append(True) - n_bins_range = [3, 5, 7, 9, 12, 15, 20, 25, 30, 50] + n_bins_range = [5, 10, 15, 20, 25, 30, 50] for n_bins in n_bins_range: estimator_list.append(HBOS(n_bins=n_bins)) standardization_flag_list.append(False) @@ -222,12 +223,18 @@ class XGBOD(BaseDetector): standardization_flag_list.append(True) # predefined range for number of estimators in isolation forests - n_range = [10, 20, 50, 70, 100, 150, 200, 250] + n_range = [10, 20, 50, 70, 100, 150, 200] for n in n_range: estimator_list.append( IForest(n_estimators=n, random_state=self.random_state)) standardization_flag_list.append(False) + # # predefined range for number of estimators in LODA + # n_bins_range = [3, 5, 10, 15, 20, 25, 30, 50] + # for n_bins in n_bins_range: + # estimator_list.append(LODA(n_bins=n_bins)) + # standardization_flag_list.append(False) + return estimator_list, standardization_flag_list def _validate_estimator(self, X): diff --git a/pyod/version.py b/pyod/version.py index 450f5d6eba6637dc1595a5a1f5d9c04918f00ec0..707dcfddff730bc98b9518bb0a43107a1a62a119 100644 --- a/pyod/version.py +++ b/pyod/version.py @@ -20,4 +20,4 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = '0.7.8.1' # pragma: no cover +__version__ = '0.7.8.2' # pragma: no cover