Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added callbacks and validation_data suppport to DeepSVDD #380

Open
wants to merge 1 commit into
base: development
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions pyod/models/deep_svdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class DeepSVDD(BaseDetector):

batch_size : int, optional (default=32)
Number of samples per gradient update.

callbacks : list, optional (default=None)
Callbacks to use during training.

dropout_rate : float in (0., 1), optional (default=0.2)
The dropout to be used across all layers.
Expand All @@ -79,6 +82,11 @@ class DeepSVDD(BaseDetector):
applied on each layer. By default, l2 regularizer is used. See
https://keras.io/regularizers/

validation_data : tuple, optional (default=None)
Data on which to evaluate the loss and any model metrics at the end
of each epoch. Should be :
- A tuple (x_val, y_val) of Numpy arrays or tensors.

validation_size : float in (0., 1), optional (default=0.1)
The percentage of data to be used for validation.

Expand Down Expand Up @@ -138,9 +146,9 @@ def __init__(self, c=None,
hidden_activation='relu',
output_activation='sigmoid',
optimizer='adam',
epochs=100, batch_size=32, dropout_rate=0.2,
l2_regularizer=0.1, validation_size=0.1, preprocessing=True,
verbose=1, random_state=None, contamination=0.1):
epochs=100, batch_size=32, callbacks=None, dropout_rate=0.2,
l2_regularizer=0.1, validation_data=None, validation_size=0.1,
preprocessing=True, verbose=1, random_state=None, contamination=0.1):
super(DeepSVDD, self).__init__(contamination=contamination)
self.c = c
self.use_ae = use_ae
Expand All @@ -164,6 +172,10 @@ def __init__(self, c=None,
self.hidden_neurons = [64, 32]

self.hidden_neurons_ = self.hidden_neurons
self.callbacks = callbacks
self.validation_data = validation_data
if self.validation_data is not None:
self.validation_size = None

check_parameter(dropout_rate, 0, 1, param_name='dropout_rate',
include_left=True)
Expand Down Expand Up @@ -248,19 +260,29 @@ def fit(self, X, y=None):
X = check_array(X)
self._set_n_classes(y)

# validate input validation_data if available (X only)
if self.validation_data is not None:
X_val, y_val = self.validation_data
X_val = check_array(X_val)
self.validation_data = (X_val, y_val)

# Verify and construct the hidden units
self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]

# Standardize data for better performance
if self.preprocessing:
self.scaler_ = StandardScaler()
X_norm = self.scaler_.fit_transform(X)
if self.validation_data is not None:
X_val_norm = self.scaler_.transform(X_val)
self.validation_data = (X_val_norm, y_val)
else:
X_norm = np.copy(X)

# Shuffle the data for validation as Keras do not shuffling for
# Validation Split
np.random.shuffle(X_norm)
if self.validation_size is not None:
np.random.shuffle(X_norm)

# Validate and complete the number of hidden neurons
if np.min(self.hidden_neurons) > self.n_features_ and self.use_ae:
Expand All @@ -276,8 +298,9 @@ def fit(self, X, y=None):
self.history_ = self.model_.fit(X_norm, X_norm,
epochs=self.epochs,
batch_size=self.batch_size,
shuffle=True,
shuffle=True, callbacks=self.callbacks,
validation_split=self.validation_size,
validation_data=self.validation_data,
verbose=self.verbose).history
# Predict on X itself and calculate the reconstruction error as
# the outlier scores. Noted X_norm was shuffled has to recreate
Expand Down