diff --git a/deel/lip/__init__.py b/deel/lip/__init__.py index 070f9f4c..fba042ac 100644 --- a/deel/lip/__init__.py +++ b/deel/lip/__init__.py @@ -1,8 +1,32 @@ +# -*- coding: utf-8 -*- # Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All # rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry, # CRIAQ and ANITI - https://www.deel.ai/ -# ===================================================================================== +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +DEEL-LIP +-------- +DEEL-LIP provides a simple interface to build and train Lipschitz-constrained neural +networks based on TensorFlow/Keras framework. +""" from os import path with open(path.join(path.dirname(__file__), "VERSION")) as f: diff --git a/deel/lip/callbacks.py b/deel/lip/callbacks.py index 144c6573..b783c446 100644 --- a/deel/lip/callbacks.py +++ b/deel/lip/callbacks.py @@ -120,7 +120,7 @@ def _monitor(self, step): ).numpy() sig = sigmas[0] else: - RuntimeWarning( + raise RuntimeWarning( f"[MonitorCallback] layer {layer_name} has no " f"attribute {self.target}" ) @@ -137,11 +137,13 @@ def _monitor(self, step): sigmas, step=step, buckets=None, - description="distribution of singular values for layer %s" - % layer_name, + description=( + f"distribution of singular values for layer " + f"{layer_name}" + ), ) if not result: - RuntimeWarning( + raise RuntimeWarning( "[MonitorCallback] unable to find filewriter, no logs were written," ) diff --git a/deel/lip/constraints.py b/deel/lip/constraints.py index e6451a77..a51df54c 100644 --- a/deel/lip/constraints.py +++ b/deel/lip/constraints.py @@ -108,7 +108,7 @@ def __init__( super(SpectralConstraint, self).__init__() def __call__(self, w): - wbar, u, sigma = reshaped_kernel_orthogonalization( + wbar, _, _ = reshaped_kernel_orthogonalization( w, self.u, self.k_coef_lip, diff --git a/deel/lip/initializers.py b/deel/lip/initializers.py index 99761034..5dd0db36 100644 --- a/deel/lip/initializers.py +++ b/deel/lip/initializers.py @@ -2,6 +2,11 @@ # rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry, # CRIAQ and ANITI - https://www.deel.ai/ # ===================================================================================== +""" +This module contains extra Keras initializers, e.g. SpectralInitializer for 1-Lipschitz +matrix initialization. +They can be used as kernel initializers in any Keras layer. +""" from tensorflow.keras.initializers import Initializer from tensorflow.keras import initializers from .normalizers import ( @@ -44,7 +49,7 @@ def __init__( def __call__(self, shape, dtype=None, partition_info=None): w = self.base_initializer(shape=shape, dtype=dtype) - wbar, u, sigma = reshaped_kernel_orthogonalization( + wbar, _, _ = reshaped_kernel_orthogonalization( w, None, self.k_coef_lip, diff --git a/deel/lip/layers/__init__.py b/deel/lip/layers/__init__.py index 35344b5b..1b0e5878 100644 --- a/deel/lip/layers/__init__.py +++ b/deel/lip/layers/__init__.py @@ -1,3 +1,30 @@ +# -*- coding: utf-8 -*- +# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All +# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry, +# CRIAQ and ANITI - https://www.deel.ai/ +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The submodule `deel.lip.layers` contains all custom Keras layers to build +Lipschitz-constrained neural networks. They all inherit from `keras.layers.Layer` from +Keras API. +""" from . import unconstrained from .activations import FullSort, GroupSort, GroupSort2, Householder, MaxMin, PReLUlip from .base_layer import Condensable, LipschitzLayer diff --git a/deel/lip/layers/activations.py b/deel/lip/layers/activations.py index 291c1baf..8284a5e4 100644 --- a/deel/lip/layers/activations.py +++ b/deel/lip/layers/activations.py @@ -18,14 +18,13 @@ @register_keras_serializable("deel-lip", "MaxMin") class MaxMin(Layer, LipschitzLayer): - def __init__(self, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs): + def __init__(self, data_format="channels_last", k_coef_lip=1.0, **kwargs): """ MaxMin activation [Relu(x),reLU(-x)] Args: data_format (str): either channels_first or channels_last k_coef_lip (float): the lipschitz coefficient to be enforced - *args: params passed to Layers **kwargs: params passed to layers (named fashion) Input shape: @@ -43,7 +42,7 @@ def __init__(self, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs) """ self.set_klip_factor(k_coef_lip) - super(MaxMin, self).__init__(*args, **kwargs) + super(MaxMin, self).__init__(**kwargs) if data_format == "channels_last": self.channel_axis = -1 elif data_format == "channels_first": @@ -59,7 +58,7 @@ def build(self, input_shape): def _compute_lip_coef(self, input_shape=None): return 1.0 - def call(self, x, **kwargs): + def call(self, x): return ( K.concatenate( (K.relu(x, alpha=0), K.relu(-x, alpha=0)), axis=self.channel_axis @@ -83,9 +82,7 @@ def compute_output_shape(self, input_shape): @register_keras_serializable("deel-lip", "GroupSort") class GroupSort(Layer, LipschitzLayer): - def __init__( - self, n=None, data_format="channels_last", k_coef_lip=1.0, *args, **kwargs - ): + def __init__(self, n=None, data_format="channels_last", k_coef_lip=1.0, **kwargs): """ GroupSort activation @@ -94,7 +91,6 @@ def __init__( size (fullSort behavior) data_format (str): either channels_first or channels_last k_coef_lip (float): the lipschitz coefficient to be enforced - *args: params passed to Layers **kwargs: params passed to layers (named fashion) Input shape: @@ -107,14 +103,13 @@ def __init__( """ self.set_klip_factor(k_coef_lip) - super(GroupSort, self).__init__(*args, **kwargs) + super(GroupSort, self).__init__(**kwargs) if data_format == "channels_last": self.channel_axis = -1 elif data_format == "channels_first": raise RuntimeError( "channels_first not implemented for GroupSort activation" ) - self.channel_axis = 1 else: raise RuntimeError("data format not understood") self.n = n @@ -138,7 +133,7 @@ def _compute_lip_coef(self, input_shape=None): return 1.0 @tf.function - def call(self, x, **kwargs): + def call(self, x): fv = tf.reshape(x, self.flat_shape) if self.n == 2: b, c = tf.split(fv, 2, -1) diff --git a/deel/lip/layers/convolutional.py b/deel/lip/layers/convolutional.py index 6e17a70a..46d06344 100644 --- a/deel/lip/layers/convolutional.py +++ b/deel/lip/layers/convolutional.py @@ -63,7 +63,7 @@ def _compute_conv_lip_factor(kernel_size, strides, input_shape, data_format): elif data_format == "channels_first": h, w = input_shape[-2], input_shape[-1] else: - raise RuntimeError("data_format not understood: " % data_format) + raise RuntimeError(f"data_format not understood: {data_format}") if stride == 1: return np.sqrt( @@ -99,7 +99,7 @@ def __init__( beta_bjorck=DEFAULT_BETA_BJORCK, maxiter_spectral=DEFAULT_MAXITER_SPECTRAL, maxiter_bjorck=DEFAULT_MAXITER_BJORCK, - **kwargs + **kwargs, ): """ This class is a Conv2D Layer constrained such that all singular of it's kernel @@ -164,11 +164,7 @@ def __init__( This documentation reuse the body of the original keras.layers.Conv2D doc. """ - if not ( - (dilation_rate == (1, 1)) - or (dilation_rate == [1, 1]) - or (dilation_rate == 1) - ): + if dilation_rate not in ((1, 1), [1, 1], 1): raise RuntimeError("SpectralConv2D does not support dilation rate") if padding != "same": raise RuntimeError("SpectralConv2D only supports padding='same'") @@ -188,7 +184,7 @@ def __init__( activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, - **kwargs + **kwargs, ) self._kwargs = kwargs self.set_klip_factor(k_coef_lip) @@ -299,7 +295,7 @@ def vanilla_export(self): use_bias=self.use_bias, kernel_initializer="glorot_uniform", bias_initializer="zeros", - **self._kwargs + **self._kwargs, ) layer.build(self.input_shape) layer.kernel.assign(self.wbar) @@ -334,7 +330,7 @@ def __init__( beta_bjorck=DEFAULT_BETA_BJORCK, maxiter_spectral=DEFAULT_MAXITER_SPECTRAL, maxiter_bjorck=DEFAULT_MAXITER_BJORCK, - **kwargs + **kwargs, ): """ This class is a Conv2DTranspose layer constrained such that all singular values @@ -419,7 +415,7 @@ def __init__( activity_regularizer, kernel_constraint, bias_constraint, - **kwargs + **kwargs, ) if self.dilation_rate != (1, 1): @@ -598,7 +594,7 @@ def vanilla_export(self): data_format=self.data_format, activation=self.activation, use_bias=self.use_bias, - **self._kwargs + **self._kwargs, ) layer.build(self.input_shape) layer.kernel.assign(self.wbar) @@ -631,15 +627,11 @@ def __init__( kernel_constraint=None, bias_constraint=None, k_coef_lip=1.0, - **kwargs + **kwargs, ): - if not ((strides == (1, 1)) or (strides == [1, 1]) or (strides == 1)): + if strides not in ((1, 1), [1, 1], 1): raise RuntimeError("FrobeniusConv2D does not support strides") - if not ( - (dilation_rate == (1, 1)) - or (dilation_rate == [1, 1]) - or (dilation_rate == 1) - ): + if dilation_rate not in ((1, 1), [1, 1], 1): raise RuntimeError("FrobeniusConv2D does not support dilation rate") if padding != "same": raise RuntimeError("FrobeniusConv2D only supports padding='same'") @@ -667,7 +659,7 @@ def __init__( activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, - **kwargs + **kwargs, ) self.set_klip_factor(k_coef_lip) self.wbar = None diff --git a/deel/lip/layers/pooling.py b/deel/lip/layers/pooling.py index d1fc209a..d08cf9ab 100644 --- a/deel/lip/layers/pooling.py +++ b/deel/lip/layers/pooling.py @@ -38,7 +38,7 @@ def __init__( padding="valid", data_format=None, k_coef_lip=1.0, - **kwargs + **kwargs, ): """ Average pooling operation for spatial data, but with a lipschitz bound. @@ -89,7 +89,7 @@ def __init__( strides=pool_size, padding=padding, data_format=data_format, - **kwargs + **kwargs, ) self.set_klip_factor(k_coef_lip) self._kwargs = kwargs @@ -102,7 +102,7 @@ def build(self, input_shape): def _compute_lip_coef(self, input_shape=None): return np.sqrt(np.prod(np.asarray(self.pool_size))) - def call(self, x, training=True): + def call(self, x): return super(keraslayers.AveragePooling2D, self).call(x) * self._get_coef() def get_config(self): @@ -123,7 +123,7 @@ def __init__( data_format=None, k_coef_lip=1.0, eps_grad_sqrt=1e-6, - **kwargs + **kwargs, ): """ Average pooling operation for spatial data, with a lipschitz bound. This @@ -179,7 +179,7 @@ def __init__( strides=pool_size, padding=padding, data_format=data_format, - **kwargs + **kwargs, ) self.set_klip_factor(k_coef_lip) self.eps_grad_sqrt = eps_grad_sqrt @@ -206,7 +206,7 @@ def grad(dy): return sqrt_op - def call(self, x, training=True): + def call(self, x): return ( ScaledL2NormPooling2D._sqrt(self.eps_grad_sqrt)( super(ScaledL2NormPooling2D, self).call(tf.square(x)) @@ -293,7 +293,7 @@ def grad(dy): return sqrt_op - def call(self, x, training=True): + def call(self, x): return ( ScaledL2NormPooling2D._sqrt(self.eps_grad_sqrt)( tf.reduce_sum(tf.square(x), axis=self.axes) @@ -355,10 +355,10 @@ def _compute_lip_coef(self, input_shape=None): elif self.data_format == "channels_first": lip_coef = np.sqrt(input_shape[-2] * input_shape[-1]) else: - raise RuntimeError("data format not understood: %s" % self.data_format) + raise RuntimeError(f"data format not understood: {self.data_format}") return lip_coef - def call(self, x, training=True): + def call(self, x): return super(ScaledGlobalAveragePooling2D, self).call(x) * self._get_coef() def get_config(self): @@ -399,11 +399,7 @@ def __init__( self.pool_size = pool_size self.data_format = data_format - def build(self, input_shape): - return super(InvertibleDownSampling, self).build(input_shape) - - def call(self, inputs, **kwargs): - # inputs = super(InvertibleDownSampling, self).call(inputs, **kwargs) + def call(self, inputs): if self.data_format == "channels_last": return tf.concat( [ @@ -467,10 +463,7 @@ def __init__( self.pool_size = pool_size self.data_format = data_format - def build(self, input_shape): - return super(InvertibleUpSampling, self).build(input_shape) - - def call(self, inputs, **kwargs): + def call(self, inputs): if self.data_format == "channels_first": # convert to channels_first inputs = tf.transpose(inputs, [0, 2, 3, 1]) diff --git a/deel/lip/layers/unconstrained.py b/deel/lip/layers/unconstrained.py index eaaa495e..4316bd5a 100644 --- a/deel/lip/layers/unconstrained.py +++ b/deel/lip/layers/unconstrained.py @@ -1,3 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All +# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry, +# CRIAQ and ANITI - https://www.deel.ai/ +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +This module contains custom Keras unconstrained layers. + +Compared to other files in `layers` folder, the layers defined here are not +Lipschitz-constrained. They are base classes for more advanced layers. Do not use these +layers as is, since they are not Lipschitz constrained. +""" import tensorflow as tf from tensorflow.keras.utils import register_keras_serializable @@ -83,7 +112,7 @@ def __init__( self.padding_size = [self.kernel_size[0] // 2, self.kernel_size[1] // 2] self.pad = lambda t: _padding_circular(t, self.padding_size) - def compute_padded_shape(self, input_shape, padding_size): + def _compute_padded_shape(self, input_shape, padding_size): if isinstance(input_shape, tf.TensorShape): internal_input_shape = input_shape.as_list() else: @@ -95,7 +124,7 @@ def compute_padded_shape(self, input_shape, padding_size): return tf.TensorShape(internal_input_shape) def build(self, input_shape): - self.internal_input_shape = self.compute_padded_shape( + self.internal_input_shape = self._compute_padded_shape( input_shape, self.padding_size ) super(PadConv2D, self).build(self.internal_input_shape) diff --git a/deel/lip/model.py b/deel/lip/model.py index 7eaa0a6b..d6595a35 100644 --- a/deel/lip/model.py +++ b/deel/lip/model.py @@ -28,12 +28,12 @@ def _is_supported_1lip_layer(layer): if isinstance(layer, supported_1lip_layers): return True elif isinstance(layer, kl.MaxPool2D): - return True if layer.pool_size <= layer.strides else False + return layer.pool_size <= layer.strides elif isinstance(layer, kl.ReLU): - return True if (layer.threshold == 0 and layer.negative_slope <= 1) else False + return bool(layer.threshold == 0 and layer.negative_slope <= 1) elif isinstance(layer, kl.Activation): supported_activations = (ka.linear, ka.relu, ka.sigmoid, ka.tanh) - return True if layer.activation in supported_activations else False + return layer.activation in supported_activations return False @@ -124,6 +124,10 @@ class Model(KerasModel): """ def condense(self): + """ + The condense operation allows to overwrite the kernel with constrained kernel + and ensure that other variables are still consistent. + """ for layer in self.layers: if isinstance(layer, Condensable): layer.condense() diff --git a/deel/lip/regularizers.py b/deel/lip/regularizers.py index a72dc079..afa97a1d 100644 --- a/deel/lip/regularizers.py +++ b/deel/lip/regularizers.py @@ -24,7 +24,10 @@ # rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry, # CRIAQ and ANITI - https://www.deel.ai/ # ===================================================================================== - +""" +This module contains custom Keras regularizers. They can be used as kernel regularizer +in any Keras layer. +""" import warnings from abc import ABC, abstractmethod @@ -99,6 +102,11 @@ def _check_if_orthconv_exists(self): ) def set_kernel_shape(self, shape): + """Set class attributes: kernel shape, padding, delta and alpha. + + Args: + shape: shape of the convolution kernel to regularize. + """ if shape is None: self.kernel_shape, self.padding, self.delta = None, None, None return @@ -122,6 +130,14 @@ def _compute_target(self, w, output_shape): raise NotImplementedError() def compute_lorth(self, w): + """Compute regularization term based on Lorth. + + Args: + w (tf.Tensor): the convolutional kernel. + + Returns: + tf.Tensor: value of the regularization term. + """ output = self._compute_conv_kk(w) target = self._compute_target(w, output.shape) return tf.reduce_sum(tf.square(output - target)) - self.delta @@ -211,6 +227,11 @@ def __init__( raise NotImplementedError("Only 2D convolutions are supported for Lorth.") def set_kernel_shape(self, shape): + """Set kernel shape. + + Args: + shape: shape of the convolution kernel to regularize. + """ self.kernel_shape = shape self.lorth.set_kernel_shape(shape) diff --git a/deel/lip/utils.py b/deel/lip/utils.py index fac82756..dc6281ef 100644 --- a/deel/lip/utils.py +++ b/deel/lip/utils.py @@ -36,7 +36,7 @@ def evaluate_lip_const_gen( float: the empirically evaluated lipschitz constant. """ - x, y = generator.send(None) + x, _ = generator.send(None) return evaluate_lip_const(model, x, eps, seed=seed) @@ -69,7 +69,7 @@ def evaluate_lip_const(model: Model, x, eps=1e-4, seed=None): ndx = K.sqrt(K.sum(K.square(dx), axis=range(1, len(x.shape)))) ndfx = K.sqrt(K.sum(K.square(dfx), axis=range(1, len(y_pred.shape)))) lip_cst = K.max(ndfx / ndx) - print("lip cst: %.3f" % lip_cst) + print(f"lip cst: {lip_cst:.3f}") return lip_cst