diff --git a/cca_zoo/__init__.py b/cca_zoo/__init__.py index 7cdf4710..d4662448 100644 --- a/cca_zoo/__init__.py +++ b/cca_zoo/__init__.py @@ -1,13 +1,17 @@ -from .data import * -from .model_selection import * -from .models import * -from .plotting import * +__all__ = [ + "data", + "model_selection", + "models", + "plotting",] +#if can import deepmodels add to all try: - from cca_zoo.deepmodels import * + import cca_zoo.deepmodels + __all__.append("deepmodels") except ModuleNotFoundError: pass try: - from cca_zoo.probabilisticmodels import * + import cca_zoo.probabilisticmodels + __all__.append("probabilisticmodels") except ModuleNotFoundError: pass diff --git a/cca_zoo/data/__init__.py b/cca_zoo/data/__init__.py index 5cb6560c..13fb5d99 100644 --- a/cca_zoo/data/__init__.py +++ b/cca_zoo/data/__init__.py @@ -1,13 +1,6 @@ -from . import simulated - try: - from . import deep + import cca_zoo.data.deep - __all__ = [ - "simulated", - "deep" - ] + __all__ = ["simulated", "deep"] except ModuleNotFoundError: - __all__ = [ - "simulated" - ] + __all__ = ["simulated"] diff --git a/cca_zoo/data/deep.py b/cca_zoo/data/deep.py index 1514340d..9365f788 100644 --- a/cca_zoo/data/deep.py +++ b/cca_zoo/data/deep.py @@ -1,3 +1,5 @@ +from typing import Iterable + import numpy as np from torch.utils.data import Dataset, DataLoader @@ -5,16 +7,17 @@ class NumpyDataset(Dataset): """ Class that turns numpy arrays into a torch dataset - """ - def __init__(self, views, labels=None): + def __init__(self, views, labels=None, scale=False, centre=False): """ :param views: list/tuple of numpy arrays or array likes with the same number of rows (samples) """ - self.views = [view for view in views] self.labels = labels + self.centre = centre + self.scale = scale + self.views = self._centre_scale(views) def __len__(self): return len(self.views[0]) @@ -27,6 +30,36 @@ def __getitem__(self, index): else: return {"views": views} + def _centre_scale(self, views: Iterable[np.ndarray]): + """ + Centers and scales the data + + Parameters + ---------- + views : list/tuple of numpy arrays or array likes with the same number of rows (samples) + + Returns + ------- + views : list of numpy arrays + + + """ + self.view_means = [] + self.view_stds = [] + transformed_views = [] + for view in views: + if self.centre: + view_mean = view.mean(axis=0) + self.view_means.append(view_mean) + view = view - self.view_means[-1] + if self.scale: + view_std = view.std(axis=0, ddof=1) + view_std[view_std == 0.0] = 1.0 + self.view_stds.append(view_std) + view = view / self.view_stds[-1] + transformed_views.append(view) + return transformed_views + def check_dataset(dataset): """ @@ -53,16 +86,16 @@ def check_dataset(dataset): def get_dataloaders( - dataset, - val_dataset=None, - batch_size=None, - val_batch_size=None, - drop_last=True, - val_drop_last=False, - shuffle_train=False, - pin_memory=True, - num_workers=0, - persistent_workers=True, + dataset, + val_dataset=None, + batch_size=None, + val_batch_size=None, + drop_last=True, + val_drop_last=False, + shuffle_train=False, + pin_memory=True, + num_workers=0, + persistent_workers=True, ): """ A utility function to allow users to quickly get hold of the dataloaders required by pytorch lightning diff --git a/cca_zoo/data/simulated.py b/cca_zoo/data/simulated.py index c984046d..2d25c8f9 100644 --- a/cca_zoo/data/simulated.py +++ b/cca_zoo/data/simulated.py @@ -6,20 +6,22 @@ from scipy.linalg import block_diag from sklearn.utils.validation import check_random_state -from ..utils import _process_parameter +from cca_zoo.utils import _process_parameter class LinearSimulatedData: - def __init__(self, - view_features: List[int], - latent_dims: int = 1, - view_sparsity: List[Union[int, float]] = None, - correlation: Union[List[float], float] = 0.99, - structure: Union[str, List[str]] = None, - sigma: Union[List[float], float] = None, - decay: float = 0.5, - positive=None, - random_state: Union[int, np.random.RandomState] = None): + def __init__( + self, + view_features: List[int], + latent_dims: int = 1, + view_sparsity: List[Union[int, float]] = None, + correlation: Union[List[float], float] = 0.99, + structure: Union[str, List[str]] = None, + sigma: Union[List[float], float] = None, + decay: float = 0.5, + positive=None, + random_state: Union[int, np.random.RandomState] = None, + ): """ Parameters @@ -57,7 +59,9 @@ def __init__(self, self.view_sparsity = _process_parameter( "view_sparsity", view_sparsity, 1, len(view_features) ) - self.positive = _process_parameter("positive", positive, False, len(view_features)) + self.positive = _process_parameter( + "positive", positive, False, len(view_features) + ) self.sigma = _process_parameter("sigma", sigma, 0.5, len(view_features)) self.mean, covs, self.true_features = self._generate_covariance_matrices() @@ -89,12 +93,12 @@ def _generate_joint_covariance(self, covs): # Cross Bit cross += covs[i] @ A @ covs[j] cov[ - splits[i]: splits[i] + self.view_features[i], - splits[j]: splits[j] + self.view_features[j], + splits[i] : splits[i] + self.view_features[i], + splits[j] : splits[j] + self.view_features[j], ] = cross cov[ - splits[j]: splits[j] + self.view_features[j], - splits[i]: splits[i] + self.view_features[i], + splits[j] : splits[j] + self.view_features[j], + splits[i] : splits[i] + self.view_features[i], ] = cross.T return cov @@ -103,7 +107,11 @@ def _generate_covariance_matrices(self): covs = [] true_features = [] for view_p, sparsity, view_structure, view_positive, view_sigma in zip( - self.view_features, self.view_sparsity, self.structure, self.positive, self.sigma + self.view_features, + self.view_sparsity, + self.structure, + self.positive, + self.sigma, ): cov = self._generate_covariance_matrix(view_p, view_structure, view_sigma) weights = self.random_state.randn(view_p, self.latent_dims) @@ -146,12 +154,12 @@ def _chol_sample(mean, chol, random_state): def simple_simulated_data( - n: int, - view_features: List[int], - view_sparsity: List[Union[int, float]] = None, - eps: float = 0, - transform=False, - random_state=None, + n: int, + view_features: List[int], + view_sparsity: List[Union[int, float]] = None, + eps: float = 0, + transform=False, + random_state=None, ): """ Generate a simple simulated dataset with a single latent dimension @@ -215,9 +223,9 @@ def _gaussian(x, mu, sig, dn): :param dn: """ return ( - np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) - * dn - / (np.sqrt(2 * np.pi) * sig) + np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) + * dn + / (np.sqrt(2 * np.pi) * sig) ) @@ -233,7 +241,7 @@ def _generate_gaussian_cov(p, sigma): def _generate_toeplitz_cov(p, sigma): c = np.arange(0, p) - c = sigma ** c + c = sigma**c cov = linalg.toeplitz(c, c) return cov diff --git a/cca_zoo/deepmodels/__init__.py b/cca_zoo/deepmodels/__init__.py index 986b0d6c..469b2d3b 100644 --- a/cca_zoo/deepmodels/__init__.py +++ b/cca_zoo/deepmodels/__init__.py @@ -1,7 +1,14 @@ from . import architectures from . import callbacks from . import objectives -from ._discriminative import DCCA, DCCA_NOI, BarlowTwins, DCCA_SDL, DTCCA, DCCA_EigenGame +from ._discriminative import ( + DCCA, + DCCA_NOI, + BarlowTwins, + DCCA_SDL, + DTCCA, + DCCA_EigenGame, +) from ._generative import DVCCA, SplitAE, DCCAE __all__ = [ @@ -28,5 +35,5 @@ "BarlowTwins", "DTCCA", "SplitAE", - "DCCA_EigenGame" + "DCCA_EigenGame", ] diff --git a/cca_zoo/deepmodels/_base.py b/cca_zoo/deepmodels/_base.py index 98f4b3b1..3b95ad6a 100644 --- a/cca_zoo/deepmodels/_base.py +++ b/cca_zoo/deepmodels/_base.py @@ -7,19 +7,19 @@ class _BaseDeep(pl.LightningModule): def __init__( - self, - latent_dims: int, - optimizer="adam", - scheduler=None, - lr=1e-3, - weight_decay=0, - extra_optimizer_kwargs=None, - max_epochs=1000, - min_lr=1e-9, - lr_decay_steps=None, - correlation=True, - *args, - **kwargs, + self, + latent_dims: int, + optimizer="adam", + scheduler=None, + lr=1e-3, + weight_decay=0, + extra_optimizer_kwargs=None, + max_epochs=1000, + min_lr=1e-9, + lr_decay_steps=None, + correlation=True, + *args, + **kwargs, ): super().__init__() if extra_optimizer_kwargs is None: @@ -73,8 +73,8 @@ def test_step(self, batch, batch_idx): return loss["objective"] def transform( - self, - loader: torch.utils.data.DataLoader, + self, + loader: torch.utils.data.DataLoader, ): """ :param loader: a dataloader that matches the structure of that used for training diff --git a/cca_zoo/deepmodels/_discriminative/_dcca.py b/cca_zoo/deepmodels/_discriminative/_dcca.py index 0fd364d4..250927d5 100644 --- a/cca_zoo/deepmodels/_discriminative/_dcca.py +++ b/cca_zoo/deepmodels/_discriminative/_dcca.py @@ -18,13 +18,13 @@ class DCCA(_BaseDeep, _BaseCCA): """ def __init__( - self, - latent_dims: int, - objective=objectives.MCCA, - encoders=None, - r: float = 0, - eps: float = 1e-5, - **kwargs, + self, + latent_dims: int, + objective=objectives.MCCA, + encoders=None, + r: float = 0, + eps: float = 1e-5, + **kwargs, ): super().__init__(latent_dims=latent_dims, **kwargs) self.encoders = torch.nn.ModuleList(encoders) @@ -41,9 +41,9 @@ def loss(self, views, **kwargs): return {"objective": self.objective.loss(z)} def pairwise_correlations( - self, - loader: torch.utils.data.DataLoader, - train=False, + self, + loader: torch.utils.data.DataLoader, + train=False, ): """ Calculates correlation for entire batch from dataloader diff --git a/cca_zoo/deepmodels/_discriminative/_dcca_barlow_twins.py b/cca_zoo/deepmodels/_discriminative/_dcca_barlow_twins.py index c9d437b1..a7edf818 100644 --- a/cca_zoo/deepmodels/_discriminative/_dcca_barlow_twins.py +++ b/cca_zoo/deepmodels/_discriminative/_dcca_barlow_twins.py @@ -14,11 +14,11 @@ class BarlowTwins(DCCA): """ def __init__( - self, - latent_dims: int, - encoders=None, - lam=1, - **kwargs, + self, + latent_dims: int, + encoders=None, + lam=1, + **kwargs, ): super().__init__(latent_dims=latent_dims, encoders=encoders, **kwargs) self.lam = lam diff --git a/cca_zoo/deepmodels/_discriminative/_dcca_eigengame.py b/cca_zoo/deepmodels/_discriminative/_dcca_eigengame.py index 93237997..2428ea51 100644 --- a/cca_zoo/deepmodels/_discriminative/_dcca_eigengame.py +++ b/cca_zoo/deepmodels/_discriminative/_dcca_eigengame.py @@ -11,18 +11,8 @@ class DCCA_EigenGame(DCCA): Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A Generalized EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022). """ - def __init__( - self, - latent_dims: int, - encoders=None, - r: float = 0, - **kwargs - ): - super().__init__( - latent_dims=latent_dims, - encoders=encoders, - **kwargs - ) + def __init__(self, latent_dims: int, encoders=None, r: float = 0, **kwargs): + super().__init__(latent_dims=latent_dims, encoders=encoders, **kwargs) self.r = r def forward(self, views, **kwargs): @@ -43,7 +33,18 @@ def loss(self, views, **kwargs): } def get_AB(self, z): - Cxy = torch.cov(torch.hstack((z[0], z[1])).T)[self.latent_dims:, :self.latent_dims] - Cxx = torch.cov(z[0].T) + torch.eye(self.latent_dims, device=z[0].device) * self.r - Cyy = torch.cov(z[1].T) + torch.eye(self.latent_dims, device=z[1].device) * self.r - return Cxy + Cxy.T, Cxx + Cyy + # sum the pairwise covariances between each z and all other zs + A = torch.zeros(self.latent_dims, self.latent_dims) + B = torch.zeros(self.latent_dims, self.latent_dims) + for i, zi in enumerate(z): + for j, zj in enumerate(z): + if i == j: + B += ( + torch.cov(zi.T) + + torch.eye(self.latent_dims, device=zi.device) * self.r + ) + else: + A += torch.cov(torch.hstack((zi, zj)).T)[ + self.latent_dims :, : self.latent_dims + ] + return A, B diff --git a/cca_zoo/deepmodels/_discriminative/_dcca_noi.py b/cca_zoo/deepmodels/_discriminative/_dcca_noi.py index 39466b37..db40b812 100644 --- a/cca_zoo/deepmodels/_discriminative/_dcca_noi.py +++ b/cca_zoo/deepmodels/_discriminative/_dcca_noi.py @@ -16,15 +16,15 @@ class DCCA_NOI(DCCA): """ def __init__( - self, - latent_dims: int, - N: int, - encoders=None, - r: float = 0, - rho: float = 0.2, - eps: float = 1e-9, - shared_target: bool = False, - **kwargs, + self, + latent_dims: int, + N: int, + encoders=None, + r: float = 0, + rho: float = 0.2, + eps: float = 1e-9, + shared_target: bool = False, + **kwargs, ): super().__init__( latent_dims=latent_dims, encoders=encoders, r=r, eps=eps, **kwargs diff --git a/cca_zoo/deepmodels/_discriminative/_dcca_sdl.py b/cca_zoo/deepmodels/_discriminative/_dcca_sdl.py index 97905f38..1cd28f22 100644 --- a/cca_zoo/deepmodels/_discriminative/_dcca_sdl.py +++ b/cca_zoo/deepmodels/_discriminative/_dcca_sdl.py @@ -15,16 +15,16 @@ class DCCA_SDL(DCCA_NOI): """ def __init__( - self, - latent_dims: int, - N: int, - encoders=None, - r: float = 0, - rho: float = 0.2, - eps: float = 1e-5, - shared_target: bool = False, - lam=0.5, - **kwargs + self, + latent_dims: int, + N: int, + encoders=None, + r: float = 0, + rho: float = 0.2, + eps: float = 1e-5, + shared_target: bool = False, + lam=0.5, + **kwargs ): super().__init__( latent_dims=latent_dims, diff --git a/cca_zoo/deepmodels/_discriminative/_dtcca.py b/cca_zoo/deepmodels/_discriminative/_dtcca.py index d9e484b2..eb03fbae 100644 --- a/cca_zoo/deepmodels/_discriminative/_dtcca.py +++ b/cca_zoo/deepmodels/_discriminative/_dtcca.py @@ -20,7 +20,7 @@ class DTCCA(DCCA): """ def __init__( - self, latent_dims: int, encoders=None, r: float = 0, eps: float = 1e-5, **kwargs + self, latent_dims: int, encoders=None, r: float = 0, eps: float = 1e-5, **kwargs ): super().__init__( latent_dims=latent_dims, diff --git a/cca_zoo/deepmodels/_generative/_base.py b/cca_zoo/deepmodels/_generative/_base.py index 2d3e7ee9..7f9aba02 100644 --- a/cca_zoo/deepmodels/_generative/_base.py +++ b/cca_zoo/deepmodels/_generative/_base.py @@ -30,8 +30,7 @@ def kl_loss(mu, logvar): def _decode(self, z, **kwargs): raise NotImplementedError - def recon(self, - loader: torch.utils.data.DataLoader, **kwargs): + def recon(self, loader: torch.utils.data.DataLoader, **kwargs): with torch.no_grad(): x = [] for batch_idx, batch in enumerate(loader): diff --git a/cca_zoo/deepmodels/_generative/_dccae.py b/cca_zoo/deepmodels/_generative/_dccae.py index 95914298..df086c2c 100644 --- a/cca_zoo/deepmodels/_generative/_dccae.py +++ b/cca_zoo/deepmodels/_generative/_dccae.py @@ -17,18 +17,18 @@ class DCCAE(DCCA, _GenerativeMixin): """ def __init__( - self, - latent_dims: int, - objective=objectives.MCCA, - encoders=None, - decoders=None, - r: float = 0, - eps: float = 1e-5, - lam=0.5, - latent_dropout=0, - img_dim=None, - recon_loss_type="mse", - **kwargs, + self, + latent_dims: int, + objective=objectives.MCCA, + encoders=None, + decoders=None, + r: float = 0, + eps: float = 1e-5, + lam=0.5, + latent_dropout=0, + img_dim=None, + recon_loss_type="mse", + **kwargs, ): super().__init__( latent_dims=latent_dims, @@ -82,7 +82,7 @@ def loss(self, views, **kwargs): ).sum() loss["correlation"] = self.objective.loss(z) loss["objective"] = ( - self.lam * loss["reconstruction"] + (1 - self.lam) * loss["correlation"] + self.lam * loss["reconstruction"] + (1 - self.lam) * loss["correlation"] ) return loss diff --git a/cca_zoo/deepmodels/_generative/_dvcca.py b/cca_zoo/deepmodels/_generative/_dvcca.py index e3ff02ce..27052f09 100644 --- a/cca_zoo/deepmodels/_generative/_dvcca.py +++ b/cca_zoo/deepmodels/_generative/_dvcca.py @@ -20,15 +20,15 @@ class DVCCA(_BaseDeep, _GenerativeMixin): """ def __init__( - self, - latent_dims: int, - encoders=None, - decoders=None, - private_encoders: Iterable = None, - latent_dropout=0, - img_dim=None, - recon_loss_type="mse", - **kwargs, + self, + latent_dims: int, + encoders=None, + decoders=None, + private_encoders: Iterable = None, + latent_dropout=0, + img_dim=None, + recon_loss_type="mse", + **kwargs, ): super().__init__(latent_dims=latent_dims, **kwargs) self.img_dim = img_dim @@ -125,7 +125,7 @@ def loss(self, views, **kwargs): ] ).sum() loss["kl shared"] = ( - self.kl_loss(z["mu_shared"], z["logvar_shared"]) / views[0].numel() + self.kl_loss(z["mu_shared"], z["logvar_shared"]) / views[0].numel() ) if "private" in z: loss["kl private"] = torch.stack( @@ -138,8 +138,8 @@ def loss(self, views, **kwargs): return loss def transform( - self, - loader: torch.utils.data.DataLoader, + self, + loader: torch.utils.data.DataLoader, ): """ :param loader: a dataloader that matches the structure of that used for training diff --git a/cca_zoo/deepmodels/_generative/_splitae.py b/cca_zoo/deepmodels/_generative/_splitae.py index b2a0144a..2e3ece8f 100644 --- a/cca_zoo/deepmodels/_generative/_splitae.py +++ b/cca_zoo/deepmodels/_generative/_splitae.py @@ -17,14 +17,14 @@ class SplitAE(_BaseDeep, _GenerativeMixin): """ def __init__( - self, - latent_dims: int, - encoder=Encoder, - decoders=None, - latent_dropout=0, - recon_loss_type="mse", - img_dim=None, - **kwargs + self, + latent_dims: int, + encoder=Encoder, + decoders=None, + latent_dropout=0, + recon_loss_type="mse", + img_dim=None, + **kwargs ): """ diff --git a/cca_zoo/deepmodels/architectures.py b/cca_zoo/deepmodels/architectures.py index 4698a79a..43f53702 100644 --- a/cca_zoo/deepmodels/architectures.py +++ b/cca_zoo/deepmodels/architectures.py @@ -31,13 +31,13 @@ def forward(self, x): class Encoder(_BaseEncoder): def __init__( - self, - latent_dims: int, - variational: bool = False, - feature_size: int = 784, - layer_sizes: tuple = None, - activation=nn.LeakyReLU(), - dropout=0, + self, + latent_dims: int, + variational: bool = False, + feature_size: int = 784, + layer_sizes: tuple = None, + activation=nn.LeakyReLU(), + dropout=0, ): super(Encoder, self).__init__(latent_dims, variational=variational) if layer_sizes is None: @@ -80,12 +80,12 @@ def forward(self, x): class Decoder(_BaseDecoder): def __init__( - self, - latent_dims: int, - feature_size: int = 784, - layer_sizes: tuple = None, - activation=nn.LeakyReLU(), - dropout=0, + self, + latent_dims: int, + feature_size: int = 784, + layer_sizes: tuple = None, + activation=nn.LeakyReLU(), + dropout=0, ): super(Decoder, self).__init__(latent_dims) if layer_sizes is None: @@ -109,16 +109,16 @@ def forward(self, x): class CNNEncoder(_BaseEncoder): def __init__( - self, - latent_dims: int, - variational: bool = False, - feature_size: Iterable = (28, 28), - channels: tuple = None, - kernel_sizes: tuple = None, - stride: tuple = None, - padding: tuple = None, - activation=nn.LeakyReLU(), - dropout=0, + self, + latent_dims: int, + variational: bool = False, + feature_size: Iterable = (28, 28), + channels: tuple = None, + kernel_sizes: tuple = None, + stride: tuple = None, + padding: tuple = None, + activation=nn.LeakyReLU(), + dropout=0, ): super(CNNEncoder, self).__init__(latent_dims, variational=variational) if channels is None: @@ -187,15 +187,15 @@ def forward(self, x): class CNNDecoder(_BaseDecoder): def __init__( - self, - latent_dims: int, - feature_size: Iterable = (28, 28), - channels: tuple = None, - kernel_sizes=None, - strides=None, - paddings=None, - activation=nn.LeakyReLU(), - dropout=0, + self, + latent_dims: int, + feature_size: Iterable = (28, 28), + channels: tuple = None, + kernel_sizes=None, + strides=None, + paddings=None, + activation=nn.LeakyReLU(), + dropout=0, ): super(CNNDecoder, self).__init__(latent_dims) if channels is None: @@ -210,7 +210,7 @@ def __init__( current_channels = 1 current_size = feature_size[0] for l_id, (channel, kernel, stride, padding) in reversed( - list(enumerate(zip(channels, kernel_sizes, strides, paddings))) + list(enumerate(zip(channels, kernel_sizes, strides, paddings))) ): conv_layers.append( torch.nn.Sequential( diff --git a/cca_zoo/deepmodels/callbacks.py b/cca_zoo/deepmodels/callbacks.py index f2d778de..65fe42f3 100644 --- a/cca_zoo/deepmodels/callbacks.py +++ b/cca_zoo/deepmodels/callbacks.py @@ -7,7 +7,7 @@ class CorrelationCallback(Callback): def on_validation_epoch_end( - self, trainer: Trainer, pl_module: LightningModule + self, trainer: Trainer, pl_module: LightningModule ) -> None: pl_module.log( "val/corr", @@ -17,7 +17,7 @@ def on_validation_epoch_end( class GenerativeCallback(Callback): def on_validation_epoch_end( - self, trainer: Trainer, pl_module: LightningModule + self, trainer: Trainer, pl_module: LightningModule ) -> None: if hasattr(pl_module, "img_dim") and pl_module.img_dim is not None: z = dict() diff --git a/cca_zoo/deepmodels/objectives.py b/cca_zoo/deepmodels/objectives.py index 898b14c8..a9a43cc3 100644 --- a/cca_zoo/deepmodels/objectives.py +++ b/cca_zoo/deepmodels/objectives.py @@ -49,7 +49,8 @@ def loss(self, views): # Get the block covariance matrix placing Xi^TX_i on the diagonal D = torch.block_diag( *[ - (1 - self.r) * m.T @ m / (n - 1) + self.r * torch.eye(m.shape[1], device=m.device) + (1 - self.r) * m.T @ m / (n - 1) + + self.r * torch.eye(m.shape[1], device=m.device) for i, m in enumerate(views) ] ) @@ -104,7 +105,8 @@ def loss(self, views): views = _demean(views) eigen_views = [ - view @ _mat_pow(view.T @ view / (n - 1), -1, self.eps) @ view.T for view in views + view @ _mat_pow(view.T @ view / (n - 1), -1, self.eps) @ view.T + for view in views ] Q = torch.stack(eigen_views, dim=0).sum(dim=0) diff --git a/cca_zoo/model_selection/_search.py b/cca_zoo/model_selection/_search.py index 944b250f..a30ad0c3 100644 --- a/cca_zoo/model_selection/_search.py +++ b/cca_zoo/model_selection/_search.py @@ -100,11 +100,11 @@ def __iter__(self): if isinstance(v, Iterable): # if each element is a distribution for each view (i.e. it is a non-string Iterable) then call return_param for each view if any( - [ - (isinstance(v_, Iterable) and not isinstance(v_, str)) - or hasattr(v_, "rvs") - for v_ in v - ] + [ + (isinstance(v_, Iterable) and not isinstance(v_, str)) + or hasattr(v_, "rvs") + for v_ in v + ] ): params[k] = [self.return_param(v_) for v_ in v] # if the parameter is shared across views then the list will just contain non-iterable values @@ -160,18 +160,18 @@ class GridSearchCV(BaseSearchCV): _required_parameters = ["estimator", "param_grid"] def __init__( - self, - estimator, - param_grid, - *, - scoring=None, - n_jobs=None, - refit=True, - cv=None, - verbose=0, - pre_dispatch="2*n_jobs", - error_score=np.nan, - return_train_score=False, + self, + estimator, + param_grid, + *, + scoring=None, + n_jobs=None, + refit=True, + cv=None, + verbose=0, + pre_dispatch="2*n_jobs", + error_score=np.nan, + return_train_score=False, ): super().__init__( estimator=estimator, @@ -208,7 +208,7 @@ def fit(self, X, y=None, *, groups=None, **fit_params): self = BaseSearchCV.fit(self, np.hstack(X), y=None, groups=None, **fit_params) self.best_estimator_ = self.best_estimator_["estimator"] self.best_params_ = { - key[len("estimator__"):]: val for key, val in self.best_params_.items() + key[len("estimator__") :]: val for key, val in self.best_params_.items() } return self @@ -247,20 +247,20 @@ class RandomizedSearchCV(BaseSearchCV): _required_parameters = ["estimator", "param_distributions"] def __init__( - self, - estimator, - param_distributions, - *, - n_iter=10, - scoring=None, - n_jobs=None, - refit=True, - cv=None, - verbose=0, - pre_dispatch="2*n_jobs", - random_state=None, - error_score=np.nan, - return_train_score=False, + self, + estimator, + param_distributions, + *, + n_iter=10, + scoring=None, + n_jobs=None, + refit=True, + cv=None, + verbose=0, + pre_dispatch="2*n_jobs", + random_state=None, + error_score=np.nan, + return_train_score=False, ): self.param_distributions = { f"estimator__{key}": val for key, val in param_distributions.items() @@ -297,6 +297,6 @@ def fit(self, X, y=None, *, groups=None, **fit_params): self = BaseSearchCV.fit(self, np.hstack(X), y=None, groups=None, **fit_params) self.best_estimator_ = self.best_estimator_["estimator"] self.best_params_ = { - key[len("estimator__"):]: val for key, val in self.best_params_.items() + key[len("estimator__") :]: val for key, val in self.best_params_.items() } return self diff --git a/cca_zoo/model_selection/_validation.py b/cca_zoo/model_selection/_validation.py index 625ec4d0..97310485 100644 --- a/cca_zoo/model_selection/_validation.py +++ b/cca_zoo/model_selection/_validation.py @@ -26,7 +26,7 @@ def scoring(estimator: Pipeline, X, y): all_corrs = [] for x, y in itertools.product(transformed_views, repeat=2): all_corrs.append( - np.diag(np.corrcoef(x.T, y.T)[: x.shape[1], x.shape[1]:]) + np.diag(np.corrcoef(x.T, y.T)[: x.shape[1], x.shape[1] :]) ) all_corrs = np.array(all_corrs).reshape( (len(transformed_views), len(transformed_views), x.shape[1]) @@ -39,20 +39,20 @@ def scoring(estimator: Pipeline, X, y): def cross_validate( - estimator, - views, - y=None, - *, - groups=None, - scoring=None, - cv=None, - n_jobs=None, - verbose=0, - fit_params=None, - pre_dispatch="2*n_jobs", - return_train_score=False, - return_estimator=False, - error_score=np.nan, + estimator, + views, + y=None, + *, + groups=None, + scoring=None, + cv=None, + n_jobs=None, + verbose=0, + fit_params=None, + pre_dispatch="2*n_jobs", + return_train_score=False, + return_estimator=False, + error_score=np.nan, ): """ Evaluate metric(s) by cross-validation and also record fit/score times. @@ -149,17 +149,17 @@ def cross_validate( def permutation_test_score( - estimator, - views, - y=None, - groups=None, - cv=None, - n_permutations=100, - n_jobs=None, - random_state=0, - verbose=0, - scoring=None, - fit_params=None, + estimator, + views, + y=None, + groups=None, + cv=None, + n_permutations=100, + n_jobs=None, + random_state=0, + verbose=0, + scoring=None, + fit_params=None, ): """ Evaluate the significance of a cross-validated score with permutations @@ -291,22 +291,22 @@ def _shuffle(X, groups, random_state, splitter): def learning_curve( - estimator, - views, - y=None, - groups=None, - train_sizes=np.linspace(0.1, 1.0, 5), - cv=None, - scoring=None, - exploit_incremental_learning=False, - n_jobs=None, - pre_dispatch="all", - verbose=0, - shuffle=False, - random_state=None, - error_score=np.nan, - return_times=False, - fit_params=None, + estimator, + views, + y=None, + groups=None, + train_sizes=np.linspace(0.1, 1.0, 5), + cv=None, + scoring=None, + exploit_incremental_learning=False, + n_jobs=None, + pre_dispatch="all", + verbose=0, + shuffle=False, + random_state=None, + error_score=np.nan, + return_times=False, + fit_params=None, ): """ Learning curve. diff --git a/cca_zoo/models/__init__.py b/cca_zoo/models/__init__.py index 30144461..37803204 100644 --- a/cca_zoo/models/__init__.py +++ b/cca_zoo/models/__init__.py @@ -36,15 +36,31 @@ "TCCA", "KTCCA", "PRCCA", - "GRCCA" + "GRCCA", ] try: - from ._stochastic import PLSStochasticPower, IncrementalPLS, PLSGHAGEP, CCAGHAGEP, RCCAGHAGEP, PLSEigenGame, \ - CCAEigenGame, RCCAEigenGame + from ._stochastic import ( + PLSStochasticPower, + IncrementalPLS, + PLSGHAGEP, + CCAGHAGEP, + RCCAGHAGEP, + PLSEigenGame, + CCAEigenGame, + RCCAEigenGame, + ) - __all__.extend("StochasticPowerPLS", "IncrementalPLS", "PLSGHAGEP", "CCAGHAGEP", "RCCAGHAGEP", "PLSEigenGame", - "CCAEigenGame", "RCCAEigenGame") + __all__.extend( + "StochasticPowerPLS", + "IncrementalPLS", + "PLSGHAGEP", + "CCAGHAGEP", + "RCCAGHAGEP", + "PLSEigenGame", + "CCAEigenGame", + "RCCAEigenGame", + ) except: pass diff --git a/cca_zoo/models/_base.py b/cca_zoo/models/_base.py index 409838c3..9f3d613a 100644 --- a/cca_zoo/models/_base.py +++ b/cca_zoo/models/_base.py @@ -17,13 +17,13 @@ class _BaseCCA(BaseEstimator, MultiOutputMixin, RegressorMixin): """ def __init__( - self, - latent_dims: int = 1, - scale=True, - centre=True, - copy_data=True, - accept_sparse=False, - random_state: Union[int, np.random.RandomState] = None, + self, + latent_dims: int = 1, + scale=True, + centre=True, + copy_data=True, + accept_sparse=False, + random_state: Union[int, np.random.RandomState] = None, ): """ Parameters @@ -108,7 +108,9 @@ def fit_transform(self, views: Iterable[np.ndarray], **kwargs): """ return self.fit(views, **kwargs).transform(views, **kwargs) - def get_factor_loadings(self, views: Iterable[np.ndarray], normalize=True, **kwargs): + def get_factor_loadings( + self, views: Iterable[np.ndarray], normalize=True, **kwargs + ): """ Returns the factor loadings for each view @@ -128,7 +130,7 @@ def get_factor_loadings(self, views: Iterable[np.ndarray], normalize=True, **kwa if normalize: loadings = [ np.corrcoef(view, transformed_view, rowvar=False)[ - : view.shape[1], view.shape[1]: + : view.shape[1], view.shape[1] : ] for view, transformed_view in zip(views, transformed_views) ] @@ -157,7 +159,7 @@ def pairwise_correlations(self, views: Iterable[np.ndarray], **kwargs): all_corrs = [] for x, y in itertools.product(transformed_views, repeat=2): all_corrs.append( - np.diag(np.corrcoef(x.T, y.T)[: self.latent_dims, self.latent_dims:]) + np.diag(np.corrcoef(x.T, y.T)[: self.latent_dims, self.latent_dims :]) ) all_corrs = np.array(all_corrs).reshape( (len(transformed_views), len(transformed_views), self.latent_dims) @@ -186,8 +188,8 @@ def score(self, views: Iterable[np.ndarray], y=None, **kwargs): n_views = pair_corrs.shape[0] # sum all the pairwise correlations for each dimension. Subtract the self correlations. Divide by the number of views. Gives average correlation dim_corrs = ( - pair_corrs.sum(axis=tuple(range(pair_corrs.ndim - 1))) - n_views - ) / (n_views ** 2 - n_views) + pair_corrs.sum(axis=tuple(range(pair_corrs.ndim - 1))) - n_views + ) / (n_views**2 - n_views) return dim_corrs def _centre_scale(self, views: Iterable[np.ndarray]): diff --git a/cca_zoo/models/_grcca.py b/cca_zoo/models/_grcca.py index fd37dcc7..2813b71b 100644 --- a/cca_zoo/models/_grcca.py +++ b/cca_zoo/models/_grcca.py @@ -37,15 +37,15 @@ class GRCCA(MCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - eps=1e-3, - c: float = 0, - mu: float = 0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + eps=1e-3, + c: float = 0, + mu: float = 0, ): super().__init__( latent_dims=latent_dims, @@ -76,7 +76,9 @@ def fit(self, views: Iterable[np.ndarray], y=None, feature_groups=None, **kwargs warnings.warn(f"No feature groups provided, using all features") feature_groups = [np.ones(view.shape[1], dtype=int) for view in views] for feature_group in feature_groups: - assert np.issubdtype(feature_group.dtype, np.integer), "feature groups must be integers" + assert np.issubdtype( + feature_group.dtype, np.integer + ), "feature groups must be integers" views = self._validate_inputs(views) self._check_params() views, idxs = self._preprocess(views, feature_groups) @@ -91,7 +93,9 @@ def _preprocess(self, views, feature_groups): zip( *[ self._process_view(view, group, mu, c) - for view, group, mu, c in zip(views, feature_groups, self.mu, self.c) + for view, group, mu, c in zip( + views, feature_groups, self.mu, self.c + ) ] ) ) @@ -116,7 +120,7 @@ def _transform_weights(self, views, groups): for i, (view, group) in enumerate(zip(views, groups)): if self.c[i] > 0: weights_1 = self.weights[i][: len(group)] - weights_2 = self.weights[i][len(group):] + weights_2 = self.weights[i][len(group) :] ids, unique_inverse, unique_counts, group_means = _group_mean( weights_1.T, group ) diff --git a/cca_zoo/models/_iterative/_altmaxvar.py b/cca_zoo/models/_iterative/_altmaxvar.py index 91ece291..a3fa89bd 100644 --- a/cca_zoo/models/_iterative/_altmaxvar.py +++ b/cca_zoo/models/_iterative/_altmaxvar.py @@ -10,6 +10,7 @@ class AltMaxVar(_BaseIterative): r""" + Fits an Alt Max Var Regularised CCA model to two or more views of data. .. math:: @@ -49,17 +50,17 @@ class AltMaxVar(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - view_regs=None, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + view_regs=None, + verbose=0, ): super().__init__( latent_dims=latent_dims, @@ -118,14 +119,14 @@ def _initialization(self, views, initialization, random_state, latent_dims): class _AltMaxVarLoop(_BaseInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - random_state=None, - view_regs=None, - alpha=1e-3, - verbose=0, - **kwargs, + self, + max_iter: int = 100, + tol=1e-9, + random_state=None, + view_regs=None, + alpha=1e-3, + verbose=0, + **kwargs, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose @@ -155,7 +156,7 @@ def _objective(self, views): total_objective = 0 for i, _ in enumerate(views): objective = np.linalg.norm(views[i] @ self.weights[i] - self.G) ** 2 / ( - 2 * self.n + 2 * self.n ) total_objective += objective + self.view_regs[i].cost( views[i], self.weights[i] diff --git a/cca_zoo/models/_iterative/_base.py b/cca_zoo/models/_iterative/_base.py index 2faff724..cd98806a 100644 --- a/cca_zoo/models/_iterative/_base.py +++ b/cca_zoo/models/_iterative/_base.py @@ -19,17 +19,17 @@ class _BaseIterative(_BaseCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - max_iter: int = 100, - initialization: Union[str, callable] = "random", - tol: float = 1e-9, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + max_iter: int = 100, + initialization: Union[str, callable] = "random", + tol: float = 1e-9, + verbose=0, ): """ Constructor for _BaseIterative @@ -74,9 +74,9 @@ def _outer_loop(self, views): self.track = [] residuals = copy.deepcopy(list(views)) for k in ( - tqdm(range(self.latent_dims), desc="latent dimension") - if self.verbose > 0 - else range(self.latent_dims) + tqdm(range(self.latent_dims), desc="latent dimension") + if self.verbose > 0 + else range(self.latent_dims) ): self._set_loop_params() self.loop = self.loop._fit(residuals, initial_scores=next(initializer)) @@ -114,11 +114,11 @@ def _set_loop_params(self): class _BaseInnerLoop: def __init__( - self, - max_iter: int = 100, - tol: float = 1e-9, - random_state=None, - verbose=0, + self, + max_iter: int = 100, + tol: float = 1e-9, + random_state=None, + verbose=0, ): self.track = {"converged": False, "objective": []} self.max_iter = max_iter @@ -137,9 +137,9 @@ def _fit(self, views: np.ndarray, initial_scores): self._initialize(views) # Iterate until convergence for _ in ( - tqdm(range(self.max_iter), desc="inner loop iterations") - if self.verbose > 1 - else range(self.max_iter) + tqdm(range(self.max_iter), desc="inner loop iterations") + if self.verbose > 1 + else range(self.max_iter) ): self._inner_iteration(views) if np.isnan(self.scores).sum() > 0: @@ -157,8 +157,8 @@ def _fit(self, views: np.ndarray, initial_scores): def _early_stop(self) -> bool: # Some kind of early stopping if all( - _cosine_similarity(self.scores[n], self.old_scores[n]) > (1 - self.tol) - for n, view in enumerate(self.scores) + _cosine_similarity(self.scores[n], self.old_scores[n]) > (1 - self.tol) + for n, view in enumerate(self.scores) ): return True else: diff --git a/cca_zoo/models/_iterative/_elastic.py b/cca_zoo/models/_iterative/_elastic.py index 6d7d9c59..e5ab4476 100644 --- a/cca_zoo/models/_iterative/_elastic.py +++ b/cca_zoo/models/_iterative/_elastic.py @@ -13,7 +13,7 @@ class ElasticCCA(_BaseIterative): r""" - Fits an elastic CCA by iterating elastic net regressions. + Fits an elastic CCA by iterating elastic net regressions to two or more views of data. By default, ElasticCCA uses CCA with an auxiliary variable target i.e. MAXVAR configuration @@ -82,22 +82,22 @@ class ElasticCCA(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - c: Union[Iterable[float], float] = None, - l1_ratio: Union[Iterable[float], float] = None, - maxvar: bool = True, - stochastic=False, - positive: Union[Iterable[bool], bool] = None, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + c: Union[Iterable[float], float] = None, + l1_ratio: Union[Iterable[float], float] = None, + maxvar: bool = True, + stochastic=False, + positive: Union[Iterable[bool], bool] = None, + verbose=0, ): self.c = c self.l1_ratio = l1_ratio @@ -193,21 +193,21 @@ class SCCA_IPLS(ElasticCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - c: Union[Iterable[float], float] = None, - max_iter: int = 100, - maxvar: bool = False, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - stochastic=False, - positive: Union[Iterable[bool], bool] = None, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + c: Union[Iterable[float], float] = None, + max_iter: int = 100, + maxvar: bool = False, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + stochastic=False, + positive: Union[Iterable[bool], bool] = None, + verbose=0, ): super().__init__( latent_dims=latent_dims, @@ -230,17 +230,17 @@ def __init__( class _ElasticInnerLoop(_PLSInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - c=None, - l1_ratio=None, - maxvar=True, - stochastic=True, - positive=None, - random_state=None, - verbose=0, - **kwargs, + self, + max_iter: int = 100, + tol=1e-9, + c=None, + l1_ratio=None, + maxvar=True, + stochastic=True, + positive=None, + random_state=None, + verbose=0, + **kwargs, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose @@ -301,8 +301,8 @@ def _update_view(self, views, view_index: int): if not self.maxvar: _check_converged_weights(self.weights[view_index], view_index) self.weights[view_index] = self.weights[view_index] / ( - np.linalg.norm(views[view_index] @ self.weights[view_index]) - / np.sqrt(self.n) + np.linalg.norm(views[view_index] @ self.weights[view_index]) + / np.sqrt(self.n) ) self.scores[view_index] = views[view_index] @ self.weights[view_index] @@ -321,7 +321,7 @@ def _objective(self, views): if self.maxvar: target /= np.linalg.norm(target) / np.sqrt(self.n) objective = np.linalg.norm(views[i] @ self.weights[i] - target) ** 2 / ( - 2 * self.n + 2 * self.n ) l1_pen = l1[i] * np.linalg.norm(self.weights[i], ord=1) l2_pen = l2[i] * np.linalg.norm(self.weights[i], ord=2) diff --git a/cca_zoo/models/_iterative/_pddgcca.py b/cca_zoo/models/_iterative/_pddgcca.py index 141d5e38..ce3ed651 100644 --- a/cca_zoo/models/_iterative/_pddgcca.py +++ b/cca_zoo/models/_iterative/_pddgcca.py @@ -8,6 +8,7 @@ class PDD_GCCA(AltMaxVar): r""" + Fits a Primal Dual Decomposition Regularized CCA model to two or more views of data. .. math:: @@ -24,17 +25,17 @@ class PDD_GCCA(AltMaxVar): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - view_regs=None, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + view_regs=None, + verbose=0, ): super().__init__( latent_dims=latent_dims, @@ -61,17 +62,17 @@ def _set_loop_params(self): class _PDD_GCCALoop(_BaseInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - random_state=None, - view_regs=None, - alpha=1e-3, - eta=1e-3, - rho=1e-3, - c=0.9, - eps=1e-3, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + random_state=None, + view_regs=None, + alpha=1e-3, + eta=1e-3, + rho=1e-3, + c=0.9, + eps=1e-3, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose @@ -103,9 +104,9 @@ def _update_view(self, views, view_index: int): targets = np.ma.array(self.scores, mask=False) targets.mask[view_index] = True target = ( - targets.sum(axis=0).filled() - + self.G[view_index] - - self.Y[view_index] / self.rho + targets.sum(axis=0).filled() + + self.G[view_index] + - self.Y[view_index] / self.rho ) weights_ = self.view_regs[view_index]( (self.n_views + self.rho) * views[view_index], @@ -120,11 +121,11 @@ def _update_view(self, views, view_index: int): ) G_ = U @ Vt if ( - max( - np.linalg.norm(weights_ - self.weights[view_index], ord=np.inf), - np.linalg.norm(G_ - self.G[view_index], ord=np.inf), - ) - < self.eps + max( + np.linalg.norm(weights_ - self.weights[view_index], ord=np.inf), + np.linalg.norm(G_ - self.G[view_index], ord=np.inf), + ) + < self.eps ): converged = True self.weights[view_index] = weights_ @@ -135,11 +136,11 @@ def _objective(self, views): total_objective = 0 for i, _ in enumerate(views): objective = ( - np.linalg.norm( - views[i] @ self.weights[i] - self.scores, ord="fro", axis=(1, 2) - ) - ** 2 - ).sum() / 2 + np.linalg.norm( + views[i] @ self.weights[i] - self.scores, ord="fro", axis=(1, 2) + ) + ** 2 + ).sum() / 2 total_objective += objective + self.view_regs[i].cost( views[i], self.weights[i] ) diff --git a/cca_zoo/models/_iterative/_pls_als.py b/cca_zoo/models/_iterative/_pls_als.py index b0204efc..a654d9c6 100644 --- a/cca_zoo/models/_iterative/_pls_als.py +++ b/cca_zoo/models/_iterative/_pls_als.py @@ -8,7 +8,7 @@ class PLS_ALS(_BaseIterative): r""" - A class used to fit a PLS model + A class used to fit a PLS model to two or more views of data. Fits a partial least squares model with CCA deflation by NIPALS algorithm @@ -20,6 +20,8 @@ class PLS_ALS(_BaseIterative): w_i^Tw_i=1 + Can also be used with more than two views + Parameters ---------- latent_dims : int, optional @@ -55,16 +57,16 @@ class PLS_ALS(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - max_iter: int = 100, - initialization: Union[str, callable] = "random", - tol: float = 1e-9, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + max_iter: int = 100, + initialization: Union[str, callable] = "random", + tol: float = 1e-9, + verbose=0, ): super().__init__( latent_dims=latent_dims, @@ -90,11 +92,11 @@ def _set_loop_params(self): class _PLSInnerLoop(_BaseInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - random_state=None, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + random_state=None, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose diff --git a/cca_zoo/models/_iterative/_pmd.py b/cca_zoo/models/_iterative/_pmd.py index ea61e379..f5586f69 100644 --- a/cca_zoo/models/_iterative/_pmd.py +++ b/cca_zoo/models/_iterative/_pmd.py @@ -11,7 +11,7 @@ class SCCA_PMD(_BaseIterative): r""" - Fits a Sparse CCA (Penalized Matrix Decomposition) model. + Fits a Sparse CCA (Penalized Matrix Decomposition) model for 2 or more views. .. math:: @@ -70,19 +70,19 @@ class SCCA_PMD(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - c: Union[Iterable[float], float] = None, - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - positive: Union[Iterable[bool], bool] = None, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + c: Union[Iterable[float], float] = None, + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + positive: Union[Iterable[bool], bool] = None, + verbose=0, ): self.c = c self.positive = positive @@ -127,13 +127,13 @@ def _check_params(self): class _PMDInnerLoop(_PLSInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - c=None, - positive=None, - random_state=None, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + c=None, + positive=None, + random_state=None, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose diff --git a/cca_zoo/models/_iterative/_scca_admm.py b/cca_zoo/models/_iterative/_scca_admm.py index 2f0dc9d0..64f906fb 100644 --- a/cca_zoo/models/_iterative/_scca_admm.py +++ b/cca_zoo/models/_iterative/_scca_admm.py @@ -9,7 +9,7 @@ class SCCA_ADMM(_BaseIterative): r""" - Fits a sparse CCA model by alternating ADMM + Fits a sparse CCA model by alternating ADMM for two or more views. .. math:: @@ -73,21 +73,21 @@ class SCCA_ADMM(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - c: Union[Iterable[float], float] = None, - mu: Union[Iterable[float], float] = None, - lam: Union[Iterable[float], float] = None, - eta: Union[Iterable[float], float] = None, - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + c: Union[Iterable[float], float] = None, + mu: Union[Iterable[float], float] = None, + lam: Union[Iterable[float], float] = None, + eta: Union[Iterable[float], float] = None, + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + verbose=0, ): self.c = c self.mu = mu @@ -126,15 +126,15 @@ def _check_params(self): class _ADMMInnerLoop(_ElasticInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - mu=None, - lam=None, - c=None, - eta=None, - random_state=None, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + mu=None, + lam=None, + c=None, + eta=None, + random_state=None, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose @@ -179,9 +179,9 @@ def _update_view(self, views, view_index: int): / lam * views[view_index].T @ ( - views[view_index] @ self.weights[view_index] - - self.z[view_index] - + self.eta[view_index] + views[view_index] @ self.weights[view_index] + - self.z[view_index] + + self.eta[view_index] ), mu, gradient, @@ -196,9 +196,9 @@ def _update_view(self, views, view_index: int): views[view_index] @ self.weights[view_index] + self.eta[view_index] ) self.eta[view_index] = ( - self.eta[view_index] - + views[view_index] @ self.weights[view_index] - - self.z[view_index] + self.eta[view_index] + + views[view_index] @ self.weights[view_index] + - self.z[view_index] ) norm_eta.append(np.linalg.norm(self.eta[view_index])) norm_proj.append( diff --git a/cca_zoo/models/_iterative/_scca_parkhomenko.py b/cca_zoo/models/_iterative/_scca_parkhomenko.py index 7363e259..16a78ab1 100644 --- a/cca_zoo/models/_iterative/_scca_parkhomenko.py +++ b/cca_zoo/models/_iterative/_scca_parkhomenko.py @@ -10,7 +10,7 @@ class SCCA_Parkhomenko(_BaseIterative): r""" - Fits a sparse CCA (penalized CCA) model + Fits a sparse CCA (penalized CCA) model for 2 or more views. .. math:: @@ -37,18 +37,18 @@ class SCCA_Parkhomenko(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - deflation="cca", - c: Union[Iterable[float], float] = None, - max_iter: int = 100, - initialization: Union[str, callable] = "pls", - tol: float = 1e-9, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + deflation="cca", + c: Union[Iterable[float], float] = None, + max_iter: int = 100, + initialization: Union[str, callable] = "pls", + tol: float = 1e-9, + verbose=0, ): self.c = c super().__init__( @@ -81,12 +81,12 @@ def _check_params(self): class _ParkhomenkoInnerLoop(_PLSInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - c=None, - random_state=None, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + c=None, + random_state=None, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose diff --git a/cca_zoo/models/_iterative/_spancca.py b/cca_zoo/models/_iterative/_spancca.py index 2d609698..d345141e 100644 --- a/cca_zoo/models/_iterative/_spancca.py +++ b/cca_zoo/models/_iterative/_spancca.py @@ -38,21 +38,21 @@ class SCCA_Span(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - max_iter: int = 100, - initialization: str = "uniform", - tol: float = 1e-9, - regularisation="l0", - c: Union[Iterable[Union[float, int]], Union[float, int]] = None, - rank=1, - positive: Union[Iterable[bool], bool] = None, - random_state=None, - deflation="cca", - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + max_iter: int = 100, + initialization: str = "uniform", + tol: float = 1e-9, + regularisation="l0", + c: Union[Iterable[Union[float, int]], Union[float, int]] = None, + rank=1, + positive: Union[Iterable[bool], bool] = None, + random_state=None, + deflation="cca", + verbose=0, ): super().__init__( latent_dims=latent_dims, @@ -101,15 +101,15 @@ def _check_params(self): class _SpanCCAInnerLoop(_BaseInnerLoop): def __init__( - self, - update, - max_iter: int = 100, - tol=1e-9, - c=None, - rank=1, - random_state=None, - positive=False, - verbose=0, + self, + update, + max_iter: int = 100, + tol=1e-9, + c=None, + rank=1, + random_state=None, + positive=False, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose diff --git a/cca_zoo/models/_iterative/_swcca.py b/cca_zoo/models/_iterative/_swcca.py index 2bceffba..a26d1373 100644 --- a/cca_zoo/models/_iterative/_swcca.py +++ b/cca_zoo/models/_iterative/_swcca.py @@ -31,20 +31,20 @@ class SWCCA(_BaseIterative): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - max_iter: int = 500, - initialization: str = "random", - tol: float = 1e-9, - regularisation="l0", - c: Union[Iterable[Union[float, int]], Union[float, int]] = None, - sample_support=None, - positive=False, - verbose=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + max_iter: int = 500, + initialization: str = "random", + tol: float = 1e-9, + regularisation="l0", + c: Union[Iterable[Union[float, int]], Union[float, int]] = None, + sample_support=None, + positive=False, + verbose=0, ): self.c = c self.sample_support = sample_support @@ -85,15 +85,15 @@ def _check_params(self): class _SWCCAInnerLoop(_PLSInnerLoop): def __init__( - self, - max_iter: int = 100, - tol=1e-9, - regularisation="l0", - c=None, - sample_support: int = None, - random_state=None, - positive=False, - verbose=0, + self, + max_iter: int = 100, + tol=1e-9, + regularisation="l0", + c=None, + sample_support: int = None, + random_state=None, + positive=False, + verbose=0, ): super().__init__( max_iter=max_iter, tol=tol, random_state=random_state, verbose=verbose @@ -114,8 +114,8 @@ def _update_view(self, views, view_index: int): targets = np.ma.array(self.scores, mask=False) targets.mask[view_index] = True self.weights[view_index] = ( - views[view_index] * self.sample_weights[:, np.newaxis] - ).T @ targets.sum(axis=0).filled() + views[view_index] * self.sample_weights[:, np.newaxis] + ).T @ targets.sum(axis=0).filled() self.weights[view_index] = self.update( self.weights[view_index], self.c[view_index], diff --git a/cca_zoo/models/_multiview/_gcca.py b/cca_zoo/models/_multiview/_gcca.py index acb47d52..8989cd61 100644 --- a/cca_zoo/models/_multiview/_gcca.py +++ b/cca_zoo/models/_multiview/_gcca.py @@ -58,15 +58,15 @@ class GCCA(rCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - view_weights: Iterable[float] = None, - eps=1e-9, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + view_weights: Iterable[float] = None, + eps=1e-9, ): super().__init__( latent_dims=latent_dims, @@ -92,15 +92,15 @@ def _setup_evp(self, views: Iterable[np.ndarray], K=None): K = np.ones((len(views), views[0].shape[0])) Q = [] for i, (view, view_weight) in enumerate(zip(views, self.view_weights)): - view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[i] * np.eye( - view.shape[1] - ) + view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[ + i + ] * np.eye(view.shape[1]) Q.append(view_weight * view @ np.linalg.inv(view_cov) @ view.T) Q = np.sum(Q, axis=0) Q = ( - np.diag(np.sqrt(np.sum(K, axis=0))) - @ Q - @ np.diag(np.sqrt(np.sum(K, axis=0))) + np.diag(np.sqrt(np.sum(K, axis=0))) + @ Q + @ np.diag(np.sqrt(np.sum(K, axis=0))) ) return Q, None @@ -140,19 +140,19 @@ class KGCCA(GCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - eps=1e-3, - kernel: Iterable[Union[float, callable]] = None, - gamma: Iterable[float] = None, - degree: Iterable[float] = None, - coef0: Iterable[float] = None, - kernel_params: Iterable[dict] = None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + eps=1e-3, + kernel: Iterable[Union[float, callable]] = None, + gamma: Iterable[float] = None, + degree: Iterable[float] = None, + coef0: Iterable[float] = None, + kernel_params: Iterable[dict] = None, ): super().__init__( latent_dims=latent_dims, @@ -200,17 +200,17 @@ def _setup_evp(self, views: Iterable[np.ndarray], K=None): K = np.ones((len(views), views[0].shape[0])) Q = [] for i, (view, view_weight) in enumerate(zip(kernels, self.view_weights)): - view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[i] * np.eye( - view.shape[1] - ) + view_cov = (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[ + i + ] * np.eye(view.shape[1]) smallest_eig = min(0, np.linalg.eigvalsh(view_cov).min()) - self.eps view_cov = view_cov - smallest_eig * np.eye(view_cov.shape[0]) Q.append(view_weight * view @ np.linalg.inv(view_cov) @ view.T) Q = np.sum(Q, axis=0) Q = ( - np.diag(np.sqrt(np.sum(K, axis=0))) - @ Q - @ np.diag(np.sqrt(np.sum(K, axis=0))) + np.diag(np.sqrt(np.sum(K, axis=0))) + @ Q + @ np.diag(np.sqrt(np.sum(K, axis=0))) ) self.splits = np.cumsum([0] + [kernel.shape[1] for kernel in kernels]) return Q, None @@ -233,5 +233,6 @@ def transform(self, views: np.ndarray, y=None, **kwargs): def _weights(self, eigvals, eigvecs, views): kernels = [self._get_kernel(i, view) for i, view in enumerate(self.train_views)] self.weights = [ - np.linalg.pinv(kernel) @ eigvecs[:, : self.latent_dims] for kernel in kernels + np.linalg.pinv(kernel) @ eigvecs[:, : self.latent_dims] + for kernel in kernels ] diff --git a/cca_zoo/models/_multiview/_mcca.py b/cca_zoo/models/_multiview/_mcca.py index 6effcc8a..724d0aa9 100644 --- a/cca_zoo/models/_multiview/_mcca.py +++ b/cca_zoo/models/_multiview/_mcca.py @@ -57,14 +57,14 @@ class MCCA(rCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - eps=1e-9, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + eps=1e-9, ): super().__init__( latent_dims=latent_dims, @@ -78,7 +78,10 @@ def __init__( self.eps = eps def _weights(self, eigvals, eigvecs, views): - self.weights = [eigvecs[split:self.splits[i + 1]] for i, split in enumerate(self.splits[:-1])] + self.weights = [ + eigvecs[split : self.splits[i + 1]] + for i, split in enumerate(self.splits[:-1]) + ] def _setup_evp(self, views: Iterable[np.ndarray], **kwargs): all_views = np.hstack(views) @@ -86,7 +89,8 @@ def _setup_evp(self, views: Iterable[np.ndarray], **kwargs): # Can regularise by adding to diagonal D = block_diag( *[ - (1 - self.c[i]) * np.cov(view, rowvar=False) + self.c[i] * np.eye(view.shape[1]) + (1 - self.c[i]) * np.cov(view, rowvar=False) + + self.c[i] * np.eye(view.shape[1]) for i, view in enumerate(views) ] ) @@ -98,7 +102,7 @@ def _setup_evp(self, views: Iterable[np.ndarray], **kwargs): def _get_weights(self, eigvals, eigvecs, views): self.weights = [ - eigvecs[split: self.splits[i + 1]] + eigvecs[split : self.splits[i + 1]] for i, split in enumerate(self.splits[:-1]) ] @@ -129,19 +133,19 @@ class KCCA(MCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - eps=1e-3, - kernel: Iterable[Union[float, callable]] = None, - gamma: Iterable[float] = None, - degree: Iterable[float] = None, - coef0: Iterable[float] = None, - kernel_params: Iterable[dict] = None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + eps=1e-3, + kernel: Iterable[Union[float, callable]] = None, + gamma: Iterable[float] = None, + degree: Iterable[float] = None, + coef0: Iterable[float] = None, + kernel_params: Iterable[dict] = None, ): super().__init__( latent_dims=latent_dims, diff --git a/cca_zoo/models/_multiview/_tcca.py b/cca_zoo/models/_multiview/_tcca.py index 7ed19e9d..0dab3162 100644 --- a/cca_zoo/models/_multiview/_tcca.py +++ b/cca_zoo/models/_multiview/_tcca.py @@ -56,13 +56,13 @@ class TCCA(_BaseCCA): """ def __init__( - self, - latent_dims: int = 1, - scale=True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, + self, + latent_dims: int = 1, + scale=True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, ): super().__init__( latent_dims=latent_dims, @@ -184,19 +184,19 @@ class KTCCA(TCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - eps=1e-3, - c: Union[Iterable[float], float] = None, - kernel: Iterable[Union[float, callable]] = None, - gamma: Iterable[float] = None, - degree: Iterable[float] = None, - coef0: Iterable[float] = None, - kernel_params: Iterable[dict] = None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + eps=1e-3, + c: Union[Iterable[float], float] = None, + kernel: Iterable[Union[float, callable]] = None, + gamma: Iterable[float] = None, + degree: Iterable[float] = None, + coef0: Iterable[float] = None, + kernel_params: Iterable[dict] = None, ): super().__init__( latent_dims=latent_dims, @@ -249,8 +249,7 @@ def _setup_tensor(self, *views: np.ndarray): ] covs_invsqrt = [np.linalg.inv(sqrtm(cov)).real for cov in covs] kernels = [ - kernel @ cov_invsqrt - for kernel, cov_invsqrt in zip(kernels, covs_invsqrt) + kernel @ cov_invsqrt for kernel, cov_invsqrt in zip(kernels, covs_invsqrt) ] return kernels, covs_invsqrt diff --git a/cca_zoo/models/_ncca.py b/cca_zoo/models/_ncca.py index f6b5fee4..a4442529 100644 --- a/cca_zoo/models/_ncca.py +++ b/cca_zoo/models/_ncca.py @@ -28,15 +28,15 @@ class NCCA(_BaseCCA): """ def __init__( - self, - latent_dims: int = 1, - scale=True, - centre=True, - copy_data=True, - accept_sparse=False, - random_state: Union[int, np.random.RandomState] = None, - nearest_neighbors=None, - gamma: Iterable[float] = None, + self, + latent_dims: int = 1, + scale=True, + centre=True, + copy_data=True, + accept_sparse=False, + random_state: Union[int, np.random.RandomState] = None, + nearest_neighbors=None, + gamma: Iterable[float] = None, ): super().__init__( latent_dims, scale, centre, copy_data, accept_sparse, random_state @@ -71,9 +71,9 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs): ] S = self.Ws[0] @ self.Ws[1] U, S, Vt = np.linalg.svd(S) - self.f = U[:, 1: self.latent_dims + 1] * np.sqrt(self.n) - self.g = Vt[1: self.latent_dims + 1, :].T * np.sqrt(self.n) - self.S = S[1: self.latent_dims + 1] + self.f = U[:, 1 : self.latent_dims + 1] * np.sqrt(self.n) + self.g = Vt[1 : self.latent_dims + 1, :].T * np.sqrt(self.n) + self.S = S[1 : self.latent_dims + 1] return self def transform(self, views: Iterable[np.ndarray], **kwargs): diff --git a/cca_zoo/models/_partialcca.py b/cca_zoo/models/_partialcca.py index 04cbdaa9..4a2e4664 100644 --- a/cca_zoo/models/_partialcca.py +++ b/cca_zoo/models/_partialcca.py @@ -46,14 +46,14 @@ class PartialCCA(MCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - eps=1e-3, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + eps=1e-3, ): super().__init__( latent_dims=latent_dims, @@ -96,7 +96,7 @@ def transform(self, views: Iterable[np.ndarray], partials=None, **kwargs): transformed_views = [] for i, (view) in enumerate(views): transformed_view = ( - view - partials @ self.confound_betas[i] - ) @ self.weights[i] + view - partials @ self.confound_betas[i] + ) @ self.weights[i] transformed_views.append(transformed_view) return transformed_views diff --git a/cca_zoo/models/_prcca.py b/cca_zoo/models/_prcca.py index 6a11c4e1..87d9c6cc 100644 --- a/cca_zoo/models/_prcca.py +++ b/cca_zoo/models/_prcca.py @@ -35,14 +35,14 @@ class PRCCA(MCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - eps=1e-3, - c=0, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + eps=1e-3, + c=0, ): """ Parameters @@ -89,12 +89,8 @@ def _pretransform(self, views, idxs): self.p = [X_i.shape[1] for X_i in X_1] X_2 = [np.delete(view, idx, axis=1) for view, idx in zip(views, idxs)] self.B = [np.linalg.pinv(X_2) @ X_1 for X_1, X_2 in zip(X_1, X_2)] - X_1 = [ - X_1 - X_2 @ B - for X_1, X_2, B in zip(X_1, X_2, self.B)] - views = [ - np.hstack((X_1, X_2)) - for X_1, X_2 in zip(X_1, X_2)] + X_1 = [X_1 - X_2 @ B for X_1, X_2, B in zip(X_1, X_2, self.B)] + views = [np.hstack((X_1, X_2)) for X_1, X_2 in zip(X_1, X_2)] return views def _setup_evp(self, views: Iterable[np.ndarray], idxs=None, **kwargs): @@ -104,7 +100,10 @@ def _setup_evp(self, views: Iterable[np.ndarray], idxs=None, **kwargs): for i, idx in enumerate(idxs): penalties[i][idx] = self.c[i] D = block_diag( - *[(1 - self.c[i]) * (m.T @ m) / self.n + np.diag(penalties[i]) for i, m in enumerate(views)] + *[ + (1 - self.c[i]) * (m.T @ m) / self.n + np.diag(penalties[i]) + for i, m in enumerate(views) + ] ) C -= block_diag(*[view.T @ view / self.n for view in views]) D_smallest_eig = min(0, np.linalg.eigvalsh(D).min()) - self.eps diff --git a/cca_zoo/models/_rcca.py b/cca_zoo/models/_rcca.py index bd64cffe..c9e789f0 100644 --- a/cca_zoo/models/_rcca.py +++ b/cca_zoo/models/_rcca.py @@ -59,15 +59,15 @@ class rCCA(_BaseCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - c: Union[Iterable[float], float] = None, - eps=1e-3, - accept_sparse=None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + c: Union[Iterable[float], float] = None, + eps=1e-3, + accept_sparse=None, ): if accept_sparse is None: accept_sparse = ["csc", "csr"] @@ -96,36 +96,46 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs): def _setup_evp(self, views: Iterable[np.ndarray], **kwargs): n = views[0].shape[0] self.principal_components = _pca_data(*views) - self.Bs = [(1 - self.c[i]) * pc.singular_values_ ** 2 / n + self.c[i] for i, pc in - enumerate(self.principal_components)] + self.Bs = [ + (1 - self.c[i]) * pc.singular_values_**2 / n + self.c[i] + for i, pc in enumerate(self.principal_components) + ] C, D = self._two_view_evp(views) return C, D def _weights(self, eigvals, eigvecs, views): R, B = self._get_R_B(views) R_12 = R[0].T @ R[1] - w_y = self.principal_components[1].components_.T @ np.diag(1 / np.sqrt(B[1])) @ eigvecs + w_y = ( + self.principal_components[1].components_.T + @ np.diag(1 / np.sqrt(B[1])) + @ eigvecs + ) w_x = ( - self.principal_components[0].components_.T - @ np.diag(1 / B[0]) - @ R_12 - @ np.diag(1 / np.sqrt(B[1])) - @ eigvecs - / np.sqrt(eigvals) + self.principal_components[0].components_.T + @ np.diag(1 / B[0]) + @ R_12 + @ np.diag(1 / np.sqrt(B[1])) + @ eigvecs + / np.sqrt(eigvals) ) self.weights = [w_x, w_y] def _get_R_B(self, views): n = views[0].shape[0] - B = [(1 - self.c[i]) * pc.singular_values_ ** 2 / n + self.c[i] for i, pc in - enumerate(self.principal_components)] + B = [ + (1 - self.c[i]) * pc.singular_values_**2 / n + self.c[i] + for i, pc in enumerate(self.principal_components) + ] R = [pc.transform(view) for view, pc in zip(views, self.principal_components)] return R, B def _solve_evp(self, C, D=None): p = C.shape[0] [eigvals, eigvecs] = eigh( - C, D, subset_by_index=[p - self.latent_dims, p - 1], + C, + D, + subset_by_index=[p - self.latent_dims, p - 1], ) idx = np.argsort(eigvals, axis=0)[::-1] eigvecs = eigvecs[:, idx].real @@ -135,11 +145,11 @@ def _two_view_evp(self, views): R, B = self._get_R_B(views) R_12 = R[0].T @ R[1] M = ( - np.diag(1 / np.sqrt(B[1])) - @ R_12.T - @ np.diag(1 / B[0]) - @ R_12 - @ np.diag(1 / np.sqrt(B[1])) + np.diag(1 / np.sqrt(B[1])) + @ R_12.T + @ np.diag(1 / B[0]) + @ R_12 + @ np.diag(1 / np.sqrt(B[1])) ) return M, None @@ -193,12 +203,12 @@ class CCA(rCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, ): super().__init__( latent_dims=latent_dims, @@ -257,12 +267,12 @@ class PLS(rCCA): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, ): super().__init__( latent_dims=latent_dims, diff --git a/cca_zoo/models/_stochastic/__init__.py b/cca_zoo/models/_stochastic/__init__.py index 8a20d5b5..30e3b87e 100644 --- a/cca_zoo/models/_stochastic/__init__.py +++ b/cca_zoo/models/_stochastic/__init__.py @@ -3,6 +3,13 @@ from ._incrementalpls import IncrementalPLS from ._stochasticpls import PLSStochasticPower -__all__ = ["IncrementalPLS", "PLSStochasticPower", - "PLSGHAGEP", "CCAGHAGEP", "RCCAGHAGEP", - "PLSEigenGame", "CCAEigenGame", "RCCAEigenGame"] +__all__ = [ + "IncrementalPLS", + "PLSStochasticPower", + "PLSGHAGEP", + "CCAGHAGEP", + "RCCAGHAGEP", + "PLSEigenGame", + "CCAEigenGame", + "RCCAEigenGame", +] diff --git a/cca_zoo/models/_stochastic/_base.py b/cca_zoo/models/_stochastic/_base.py index 33c0d546..aad4a015 100644 --- a/cca_zoo/models/_stochastic/_base.py +++ b/cca_zoo/models/_stochastic/_base.py @@ -6,33 +6,33 @@ from torch.utils import data from cca_zoo.data.deep import NumpyDataset -from cca_zoo.models import CCA, PLS +from cca_zoo.models import CCA from cca_zoo.models._base import _BaseCCA class _BaseStochastic(_BaseCCA): def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - eps=1e-3, - accept_sparse=None, - batch_size=1, - shuffle=True, - sampler=None, - batch_sampler=None, - num_workers=0, - pin_memory=False, - drop_last=True, - timeout=0, - worker_init_fn=None, - epochs=1, - val_split=None, - val_interval=10, - learning_rate=0.01, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + eps=1e-3, + accept_sparse=None, + batch_size=None, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + val_split=None, + val_interval=10, + learning_rate=0.01, ): if accept_sparse is None: accept_sparse = ["csc", "csr"] @@ -67,6 +67,8 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs): train_size = int((1 - self.val_split) * len(dataset)) val_size = len(dataset) - train_size dataset, val_dataset = data.random_split(dataset, [train_size, val_size]) + if self.batch_size is None: + self.batch_size = len(dataset) dataloader = data.DataLoader( dataset, batch_size=self.batch_size, @@ -86,10 +88,13 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs): ) self.track = [] self.weights = [ - self.random_state.normal(0, 1, size=(view.shape[1],self.latent_dims)) for view in views + self.random_state.normal(0, 1, size=(view.shape[1], self.latent_dims)) + for view in views ] # normalize weights - self.weights = [weight / np.linalg.norm(weight, axis=0) for weight in self.weights] + self.weights = [ + weight / np.linalg.norm(weight, axis=0) for weight in self.weights + ] for _ in range(self.epochs): for i, sample in enumerate(dataloader): self.update([view.numpy() for view in sample["views"]]) @@ -107,7 +112,7 @@ def objective(self, views, **kwargs): return self.tcc(views) def tv(self, views): - #q from qr decomposition of weights + # q from qr decomposition of weights q = [np.linalg.qr(weight)[0] for weight in self.weights] views = self._centre_scale_transform(views) transformed_views = [] @@ -116,11 +121,11 @@ def tv(self, views): transformed_views.append(transformed_view) return tv(transformed_views) - def tcc(self, views): z = self.transform(views) return tcc(z) + def tv(z): all_z = np.hstack(z) C = np.cov(all_z, rowvar=False) @@ -128,5 +133,6 @@ def tv(z): C /= z[0].shape[0] return np.linalg.svd(C, compute_uv=False).sum() + def tcc(z): - return CCA(z[0].shape[1]).fit(z).score(z).sum() \ No newline at end of file + return CCA(z[0].shape[1]).fit(z).score(z).sum() diff --git a/cca_zoo/models/_stochastic/_eigengame.py b/cca_zoo/models/_stochastic/_eigengame.py index fb9fdc70..7973ddcc 100644 --- a/cca_zoo/models/_stochastic/_eigengame.py +++ b/cca_zoo/models/_stochastic/_eigengame.py @@ -5,7 +5,6 @@ class RCCAEigenGame(_BaseStochastic): - """ A class used to fit Regularized CCA by Delta-EigenGame @@ -54,26 +53,25 @@ class RCCAEigenGame(_BaseStochastic): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - accept_sparse=None, - batch_size=1, - shuffle=True, - sampler=None, - batch_sampler=None, - num_workers=0, - pin_memory=False, - drop_last=True, - timeout=0, - worker_init_fn=None, - epochs=1, - learning_rate=0.01, - c=0, - **kwargs + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=None, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, + c=0, ): super().__init__( latent_dims=latent_dims, @@ -93,7 +91,6 @@ def __init__( worker_init_fn=worker_init_fn, epochs=epochs, learning_rate=learning_rate, - **kwargs ) self.c = c @@ -172,12 +169,47 @@ class CCAEigenGame(RCCAEigenGame): ---------- Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A Generalized EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022). """ + def __init__( - self, - *args, **kwargs, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=None, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, ): - kwargs.pop('c', None) - super().__init__(*args, c=0, **kwargs) + super().__init__( + latent_dims=latent_dims, + scale=scale, + centre=centre, + copy_data=copy_data, + accept_sparse=accept_sparse, + random_state=random_state, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + epochs=epochs, + learning_rate=learning_rate, + c=0, + ) class PLSEigenGame(RCCAEigenGame): @@ -225,12 +257,47 @@ class PLSEigenGame(RCCAEigenGame): ---------- Chapman, James, Ana Lawry Aguila, and Lennie Wells. "A Generalized EigenGame with Extensions to Multiview Representation Learning." arXiv preprint arXiv:2211.11323 (2022). """ + def __init__( - self, - *args, **kwargs, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=None, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, ): - kwargs.pop('c', None) - super().__init__(*args, c=1, **kwargs) + super().__init__( + latent_dims=latent_dims, + scale=scale, + centre=centre, + copy_data=copy_data, + accept_sparse=accept_sparse, + random_state=random_state, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + epochs=epochs, + learning_rate=learning_rate, + c=1, + ) def objective(self, views, **kwargs): return self.tv(views) diff --git a/cca_zoo/models/_stochastic/_ghagep.py b/cca_zoo/models/_stochastic/_ghagep.py index c3862e48..9aba2e53 100644 --- a/cca_zoo/models/_stochastic/_ghagep.py +++ b/cca_zoo/models/_stochastic/_ghagep.py @@ -53,26 +53,25 @@ class RCCAGHAGEP(_BaseStochastic): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - accept_sparse=None, - batch_size=1, - shuffle=True, - sampler=None, - batch_sampler=None, - num_workers=0, - pin_memory=False, - drop_last=True, - timeout=0, - worker_init_fn=None, - epochs=1, - learning_rate=0.01, - c=0, - **kwargs + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=1, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, + c=0, ): super().__init__( latent_dims=latent_dims, @@ -92,7 +91,6 @@ def __init__( worker_init_fn=worker_init_fn, epochs=epochs, learning_rate=learning_rate, - **kwargs ) self.c = c @@ -111,7 +109,7 @@ def update(self, views): Bw = self._Bw(view, projections[i].filled(), self.weights[i], self.c[i]) wAw = self.weights[i].T @ Aw wAw[np.diag_indices_from(wAw)] = np.where(np.diag(wAw) > 0, np.diag(wAw), 0) - grads = (Aw - Bw @ np.triu(wAw)) + grads = Aw - Bw @ np.triu(wAw) self.weights[i] += self.learning_rate * grads def _Aw(self, view, projections): @@ -171,11 +169,45 @@ class CCAGHAGEP(RCCAGHAGEP): """ def __init__( - self, - *args, **kwargs, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=1, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, ): - kwargs.pop('c', None) - super().__init__(*args, c=0, **kwargs) + super().__init__( + latent_dims=latent_dims, + scale=scale, + centre=centre, + copy_data=copy_data, + accept_sparse=accept_sparse, + random_state=random_state, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + epochs=epochs, + learning_rate=learning_rate, + c=0, + ) class PLSGHAGEP(RCCAGHAGEP): @@ -225,11 +257,45 @@ class PLSGHAGEP(RCCAGHAGEP): """ def __init__( - self, - *args, **kwargs, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=1, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, ): - kwargs.pop('c', None) - super().__init__(*args, c=1, **kwargs) + super().__init__( + latent_dims=latent_dims, + scale=scale, + centre=centre, + copy_data=copy_data, + accept_sparse=accept_sparse, + random_state=random_state, + batch_size=batch_size, + shuffle=shuffle, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=drop_last, + timeout=timeout, + worker_init_fn=worker_init_fn, + epochs=epochs, + learning_rate=learning_rate, + c=1, + ) def objective(self, views, **kwargs): return self.tv(views) diff --git a/cca_zoo/models/_stochastic/_incrementalpls.py b/cca_zoo/models/_stochastic/_incrementalpls.py index 88b48566..9e1d2f38 100644 --- a/cca_zoo/models/_stochastic/_incrementalpls.py +++ b/cca_zoo/models/_stochastic/_incrementalpls.py @@ -50,24 +50,24 @@ class IncrementalPLS(_BaseStochastic): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - accept_sparse=None, - batch_size=1, - shuffle=True, - sampler=None, - batch_sampler=None, - num_workers=0, - pin_memory=False, - drop_last=True, - timeout=0, - worker_init_fn=None, - epochs=1, - simple=False, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=1, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + simple=False, ): super().__init__( latent_dims=latent_dims, @@ -114,8 +114,8 @@ def simple_update(self, views): if not hasattr(self, "M"): self.M = np.zeros((views[0].shape[1], views[1].shape[1])) self.M = ( - views[0].T @ views[1] - + self.weights[0] @ np.diag(self.S) @ self.weights[1].T + views[0].T @ views[1] + + self.weights[0] @ np.diag(self.S) @ self.weights[1].T ) U, S, Vt = np.linalg.svd(self.M) self.weights[0] = U[:, : self.latent_dims] @@ -144,12 +144,12 @@ def incrsvd(self, hats, orths): ) U, S, Vt = np.linalg.svd(Q) self.weights[0] = ( - np.hstack((self.weights[0], orths[0].T / np.linalg.norm(orths[0]))) - @ U[:, : self.latent_dims] + np.hstack((self.weights[0], orths[0].T / np.linalg.norm(orths[0]))) + @ U[:, : self.latent_dims] ) self.weights[1] = ( - np.hstack((self.weights[1], orths[1].T / np.linalg.norm(orths[1]))) - @ Vt.T[:, : self.latent_dims] + np.hstack((self.weights[1], orths[1].T / np.linalg.norm(orths[1]))) + @ Vt.T[:, : self.latent_dims] ) self.S = S[: self.latent_dims] @@ -157,7 +157,7 @@ def objective(self, views, **kwargs): return np.sum( np.diag( np.cov(*self.transform(views), rowvar=False)[ - : self.latent_dims, self.latent_dims: + : self.latent_dims, self.latent_dims : ] ) ) diff --git a/cca_zoo/models/_stochastic/_stochasticpls.py b/cca_zoo/models/_stochastic/_stochasticpls.py index c725f456..4b65d0e8 100644 --- a/cca_zoo/models/_stochastic/_stochasticpls.py +++ b/cca_zoo/models/_stochastic/_stochasticpls.py @@ -51,24 +51,24 @@ class PLSStochasticPower(_BaseStochastic): """ def __init__( - self, - latent_dims: int = 1, - scale: bool = True, - centre=True, - copy_data=True, - random_state=None, - accept_sparse=None, - batch_size=1, - shuffle=True, - sampler=None, - batch_sampler=None, - num_workers=0, - pin_memory=False, - drop_last=True, - timeout=0, - worker_init_fn=None, - epochs=1, - learning_rate=0.01, + self, + latent_dims: int = 1, + scale: bool = True, + centre=True, + copy_data=True, + random_state=None, + accept_sparse=None, + batch_size=1, + shuffle=True, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + drop_last=True, + timeout=0, + worker_init_fn=None, + epochs=1, + learning_rate=0.01, ): super().__init__( latent_dims=latent_dims, @@ -98,16 +98,18 @@ def update(self, views): projections = np.ma.array(projections, mask=False, keep_mask=False) projections.mask[i] = True self.weights[i] += ( - self.learning_rate * (view.T @ projections.sum(axis=0).filled()) / view.shape[0] + self.learning_rate + * (view.T @ projections.sum(axis=0).filled()) + / view.shape[0] ) - #qr decomposition of weights for orthogonality + # qr decomposition of weights for orthogonality self.weights[i] = self._orth(self.weights[i]) @staticmethod def _orth(U): Qu, Ru = np.linalg.qr(U) Su = np.sign(np.sign(np.diag(Ru)) + 0.5) - return (Qu @ np.diag(Su)) + return Qu @ np.diag(Su) def objective(self, views, **kwargs): return self.tv(views) diff --git a/cca_zoo/plotting/plotting.py b/cca_zoo/plotting/plotting.py index 115862a8..80981be4 100644 --- a/cca_zoo/plotting/plotting.py +++ b/cca_zoo/plotting/plotting.py @@ -48,7 +48,7 @@ def cv_plot(cv_results_): param_pairs = list(itertools.product(unique_x, unique_y)) for pair in param_pairs: mask = (cv_results_[param_cols[-2]] == pair[0]) & ( - cv_results_[param_cols[-1]] == pair[1] + cv_results_[param_cols[-1]] == pair[1] ) sub_dfs.append(cv_results_.loc[mask].iloc[:, :-2]) sub_scores.append(cv_results_[mask].mean_test_score) @@ -86,9 +86,9 @@ def cv_plot(cv_results_): def pairplot_train_test( - train_scores: Union[Tuple[np.ndarray], List[np.ndarray]], - test_scores: Union[Tuple[np.ndarray], List[np.ndarray]] = None, - title="", + train_scores: Union[Tuple[np.ndarray], List[np.ndarray]], + test_scores: Union[Tuple[np.ndarray], List[np.ndarray]] = None, + title="", ): """ Makes a pair plot showing the projections of each view against each other for each dimensions. Coloured by train and test @@ -117,10 +117,10 @@ def pairplot_train_test( def pairplot_label( - scores: Union[Tuple[np.ndarray], List[np.ndarray]], - labels=None, - label_name=None, - title="", + scores: Union[Tuple[np.ndarray], List[np.ndarray]], + labels=None, + label_name=None, + title="", ): """ Makes a pair plot showing the projections of each view against each other for each dimensions. Coloured by categorical label @@ -144,7 +144,7 @@ def pairplot_label( def scatterplot_label( - scores: np.ndarray, labels=None, label_name=None, title="", ax=None + scores: np.ndarray, labels=None, label_name=None, title="", ax=None ): """ Makes a scatter plot showing projections coloured by categorical label @@ -161,14 +161,14 @@ def scatterplot_label( def tsne_label( - scores: np.ndarray, - labels=None, - label_name=None, - title="", - verbose=1, - perplexity=40, - n_iter=300, - ax=None, + scores: np.ndarray, + labels=None, + label_name=None, + title="", + verbose=1, + perplexity=40, + n_iter=300, + ax=None, ): """ Makes a tsne plot of the projections from one view with optional labels diff --git a/cca_zoo/probabilisticmodels/_probabilisticcca.py b/cca_zoo/probabilisticmodels/_probabilisticcca.py index 51729a09..1ee5bac3 100644 --- a/cca_zoo/probabilisticmodels/_probabilisticcca.py +++ b/cca_zoo/probabilisticmodels/_probabilisticcca.py @@ -37,12 +37,12 @@ class ProbabilisticCCA(_BaseCCA): """ def __init__( - self, - latent_dims: int = 1, - copy_data=True, - random_state: int = 0, - num_samples=100, - num_warmup=100, + self, + latent_dims: int = 1, + copy_data=True, + random_state: int = 0, + num_samples=100, + num_warmup=100, ): super().__init__( latent_dims=latent_dims, @@ -116,6 +116,6 @@ def _model(self, views: Iterable[np.ndarray]): obs=X_, ) for i, (X_, psi_, mu_, W_) in enumerate( - zip(views, psi, mu, self.weights_list) - ) + zip(views, psi, mu, self.weights_list) + ) ] diff --git a/cca_zoo/test/test_deepmodels.py b/cca_zoo/test/test_deepmodels.py index 4809a31d..47599750 100644 --- a/cca_zoo/test/test_deepmodels.py +++ b/cca_zoo/test/test_deepmodels.py @@ -4,20 +4,22 @@ from torch import manual_seed from torch.utils.data import random_split -from cca_zoo import DCCA_EigenGame from cca_zoo.data.deep import NumpyDataset, get_dataloaders, check_dataset from cca_zoo.deepmodels import ( DCCA, + DCCA_EigenGame, DCCAE, DVCCA, DCCA_NOI, DTCCA, SplitAE, BarlowTwins, - DCCA_SDL, objectives, + DCCA_SDL, + objectives, ) from cca_zoo.deepmodels import architectures from cca_zoo.models import CCA +from cca_zoo.models import MCCA manual_seed(0) rng = check_random_state(0) @@ -26,7 +28,7 @@ Z = rng.rand(256, 14) X_conv = rng.rand(256, 1, 16, 16) Y_conv = rng.rand(256, 1, 16, 16) -dataset = NumpyDataset([X, Y, Z]) +dataset = NumpyDataset([X, Y, Z], scale=True, centre=True) check_dataset(dataset) train_dataset, val_dataset = random_split(dataset, [200, 56]) loader = get_dataloaders(dataset) @@ -36,25 +38,86 @@ train_ids = train_dataset.indices epochs = 100 + def test_numpy_dataset(): dataset = NumpyDataset([X, Y, Z]) check_dataset(dataset) get_dataloaders(dataset) + def test_linear(): - encoder_1 = architectures.LinearEncoder(latent_dims=1, feature_size=10) - encoder_2 = architectures.LinearEncoder(latent_dims=1, feature_size=12) - dcca = DCCA(latent_dims=1, encoders=[encoder_1, encoder_2], lr=1e-1) + latent_dims = 2 + cca = CCA(latent_dims=latent_dims).fit((X, Y)) + encoder_1 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=10) + encoder_2 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=12) + dcca = DCCA( + latent_dims=latent_dims, + encoders=[encoder_1, encoder_2], + lr=1e-1, + objective=objectives.MCCA, + ) trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dcca, loader) - cca = CCA().fit((X, Y)) # check linear encoder with SGD matches vanilla linear CCA assert ( - np.testing.assert_array_almost_equal( - cca.score((X, Y)), dcca.score(loader), decimal=2 - ) - is None + np.testing.assert_array_almost_equal( + cca.score((X, Y)), dcca.score(loader), decimal=2 + ) + is None + ) + encoder_1 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=10) + encoder_2 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=12) + dcca = DCCA_EigenGame( + latent_dims=latent_dims, encoders=[encoder_1, encoder_2], lr=1e-1 + ) + trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) + trainer.fit(dcca, loader) + # check linear encoder with EG matches vanilla linear CCA + assert ( + np.testing.assert_array_almost_equal( + cca.score((X, Y)), dcca.score(loader), decimal=2 + ) + is None + ) + + +def test_linear_mcca(): + latent_dims = 2 + cca = MCCA(latent_dims=latent_dims).fit((X, Y, Z)) + # DCCA_MCCA + encoder_1 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=10) + encoder_2 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=12) + encoder_3 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=14) + dmcca = DCCA( + latent_dims=latent_dims, + encoders=[encoder_1, encoder_2, encoder_3], + lr=1e-1, + objective=objectives.MCCA, + ) + trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) + trainer.fit(dmcca, loader) + assert ( + np.testing.assert_array_almost_equal( + cca.score((X, Y, Z)).sum(), dmcca.score(loader).sum(), decimal=2 + ) + is None + ) + # DCCA_EigenGame + encoder_1 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=10) + encoder_2 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=12) + encoder_3 = architectures.LinearEncoder(latent_dims=latent_dims, feature_size=14) + dmccaeg = DCCA_EigenGame( + latent_dims=latent_dims, encoders=[encoder_1, encoder_2, encoder_3], lr=1e-1 ) + trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) + trainer.fit(dmccaeg, loader) + assert ( + np.testing.assert_array_almost_equal( + cca.score((X, Y, Z)).sum(), dmccaeg.score(loader).sum(), decimal=2 + ) + is None + ) + def test_DCCA_methods(): N = len(train_dataset) @@ -71,10 +134,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dcca, train_loader, val_dataloaders=val_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), dcca.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), dcca.score(train_loader).sum() + ) + is None ) # DCCA_EigenGame encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -86,10 +149,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dcca_eg, train_loader, val_dataloaders=val_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), dcca_eg.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), dcca_eg.score(train_loader).sum() + ) + is None ) # DCCA_NOI encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -98,10 +161,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dcca_noi, train_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), dcca_noi.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), dcca_noi.score(train_loader).sum() + ) + is None ) # Soft Decorrelation (_stochastic Decorrelation Loss) encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -110,10 +173,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(sdl, train_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), sdl.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), sdl.score(train_loader).sum() + ) + is None ) # Barlow Twins encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -125,10 +188,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(barlowtwins, train_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), barlowtwins.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), barlowtwins.score(train_loader).sum() + ) + is None ) # DGCCA encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -141,10 +204,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dgcca, train_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), dgcca.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), dgcca.score(train_loader).sum() + ) + is None ) # DMCCA encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10) @@ -157,10 +220,10 @@ def test_DCCA_methods(): trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False, logger=False) trainer.fit(dmcca, train_loader) assert ( - np.testing.assert_array_less( - cca.score((X, Y)).sum(), dmcca.score(train_loader).sum() - ) - is None + np.testing.assert_array_less( + cca.score((X, Y)).sum(), dmcca.score(train_loader).sum() + ) + is None ) @@ -175,13 +238,14 @@ def test_DTCCA_methods(): trainer.fit(dtcca, train_loader) z = dtcca.transform(train_loader) assert ( - np.testing.assert_array_almost_equal( - cca.fit((X[train_ids], Y[train_ids])).score((X[train_ids], Y[train_ids])) - .sum(), - cca.fit((z)).score((z)).sum(), - decimal=1, - ) - is None + np.testing.assert_array_almost_equal( + cca.fit((X[train_ids], Y[train_ids])) + .score((X[train_ids], Y[train_ids])) + .sum(), + cca.fit((z)).score((z)).sum(), + decimal=1, + ) + is None ) diff --git a/cca_zoo/test/test_models.py b/cca_zoo/test/test_models.py index 4ae60a97..01a93336 100644 --- a/cca_zoo/test/test_models.py +++ b/cca_zoo/test/test_models.py @@ -3,10 +3,9 @@ import scipy.sparse as sp from sklearn.utils.fixes import loguniform from sklearn.utils.validation import check_random_state - -from cca_zoo import cross_validate, permutation_test_score, learning_curve, PRCCA, GRCCA from cca_zoo.data.simulated import LinearSimulatedData -from cca_zoo.model_selection import GridSearchCV, RandomizedSearchCV +from cca_zoo.model_selection import GridSearchCV, RandomizedSearchCV, cross_validate, permutation_test_score, \ + learning_curve from cca_zoo.models import ( rCCA, CCA, @@ -26,7 +25,7 @@ PartialCCA, PLS_ALS, SCCA_ADMM, - SCCA_Parkhomenko, + SCCA_Parkhomenko, PRCCA, GRCCA, ) from cca_zoo.plotting import pairplot_train_test @@ -64,7 +63,7 @@ def test_unregularized_methods(): assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_tcca, decimal=1) is None assert ( - np.testing.assert_array_almost_equal(corr_kgcca, corr_gcca, decimal=1) is None + np.testing.assert_array_almost_equal(corr_kgcca, corr_gcca, decimal=1) is None ) @@ -101,7 +100,7 @@ def test_regularized_methods(): # Check the correlations from each unregularized method are the same assert np.testing.assert_array_almost_equal(corr_pls, corr_mcca, decimal=1) is None assert ( - np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1) is None + np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1) is None ) assert np.testing.assert_array_almost_equal(corr_pls, corr_rcca, decimal=1) is None @@ -170,10 +169,10 @@ def test_weighted_GCCA_methods(): K[0, 200:] = 0 unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit((X, Y), K=K) assert ( - np.testing.assert_array_almost_equal( - corr_unweighted_gcca, corr_deweighted_gcca, decimal=1 - ) - is None + np.testing.assert_array_almost_equal( + corr_unweighted_gcca, corr_deweighted_gcca, decimal=1 + ) + is None ) @@ -222,18 +221,28 @@ def test_partialcca(): def test_stochastic_pls(): pytest.importorskip("torch") - from cca_zoo.models import PLSGHAGEP, PLSEigenGame, PLSStochasticPower, IncrementalPLS + from cca_zoo.models import ( + PLSGHAGEP, + PLSEigenGame, + PLSStochasticPower, + IncrementalPLS, + ) from torch import manual_seed + manual_seed(42) pls = PLS(latent_dims=3).fit((X, Y)) - ipls = IncrementalPLS(latent_dims=3, epochs=150, simple=False, batch_size=10, random_state=1).fit( - (X, Y) - ) - spls = PLSStochasticPower(latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1).fit( - (X, Y) - ) - egpls = PLSEigenGame(latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1).fit((X, Y)) - ghapls = PLSGHAGEP(latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1).fit((X, Y)) + ipls = IncrementalPLS( + latent_dims=3, epochs=150, simple=False, batch_size=10, random_state=1 + ).fit((X, Y)) + spls = PLSStochasticPower( + latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1 + ).fit((X, Y)) + egpls = PLSEigenGame( + latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1 + ).fit((X, Y)) + ghapls = PLSGHAGEP( + latent_dims=3, epochs=150, batch_size=10, learning_rate=1e-2, random_state=1 + ).fit((X, Y)) pls_score = pls.score((X, Y)) ipls_score = ipls.score((X, Y)) spls_score = spls.score((X, Y)) @@ -249,9 +258,14 @@ def test_stochastic_pls(): def test_stochastic_cca(): pytest.importorskip("torch") from cca_zoo.models import CCAGHAGEP, CCAEigenGame + cca = CCA(latent_dims=1).fit((X, Y)) - egcca = CCAEigenGame(latent_dims=1, epochs=500, batch_size=10, learning_rate=5e-2).fit((X, Y)) - ghacca = CCAGHAGEP(latent_dims=1, epochs=500, batch_size=10, learning_rate=5e-2).fit((X, Y)) + egcca = CCAEigenGame( + latent_dims=1, epochs=500, batch_size=10, learning_rate=5e-2 + ).fit((X, Y)) + ghacca = CCAGHAGEP( + latent_dims=1, epochs=500, batch_size=10, learning_rate=5e-2 + ).fit((X, Y)) cca_score = cca.score((X, Y)) egcca_score = egcca.score((X, Y)) ghacca_score = ghacca.score((X, Y)) @@ -277,12 +291,26 @@ def test_plotting(): def test_PRCCA(): # Test that PRCCA works - prcca = PRCCA(latent_dims=2, c=[0, 0]).fit((X, Y), idxs=(np.arange(10), np.arange(11))) + prcca = PRCCA(latent_dims=2, c=[0, 0]).fit( + (X, Y), idxs=(np.arange(10), np.arange(11)) + ) cca = CCA(latent_dims=2).fit([X, Y]) - assert np.testing.assert_array_almost_equal(cca.score((X, Y)), prcca.score((X, Y)), decimal=1) is None - prcca = PRCCA(latent_dims=2, c=[1, 1]).fit((X, Y), idxs=(np.arange(10), np.arange(11))) + assert ( + np.testing.assert_array_almost_equal( + cca.score((X, Y)), prcca.score((X, Y)), decimal=1 + ) + is None + ) + prcca = PRCCA(latent_dims=2, c=[1, 1]).fit( + (X, Y), idxs=(np.arange(10), np.arange(11)) + ) pls = PLS(latent_dims=2).fit([X, Y]) - assert np.testing.assert_array_almost_equal(pls.score((X, Y)), prcca.score((X, Y)), decimal=1) is None + assert ( + np.testing.assert_array_almost_equal( + pls.score((X, Y)), prcca.score((X, Y)), decimal=1 + ) + is None + ) def test_GRCCA(): @@ -296,10 +324,14 @@ def test_GRCCA(): feature_group_3[:3] = 1 feature_group_3[3:6] = 2 # Test that GRCCA works - grcca = GRCCA(latent_dims=1, c=[100, 0], mu=0).fit((X, Y), feature_groups=[feature_group_1, feature_group_2]) + grcca = GRCCA(latent_dims=1, c=[100, 0], mu=0).fit( + (X, Y), feature_groups=[feature_group_1, feature_group_2] + ) grcca.score((X, Y)) grcca.transform((X, Y)) - grcca = GRCCA(c=[100, 0, 50]).fit((X, Y, Z), feature_groups=[feature_group_1, feature_group_2, feature_group_3]) + grcca = GRCCA(c=[100, 0, 50]).fit( + (X, Y, Z), feature_groups=[feature_group_1, feature_group_2, feature_group_3] + ) def test_PCCA(): @@ -317,11 +349,11 @@ def test_PCCA(): ).fit([X, Y]) # Test that vanilla CCA and VCCA produce roughly similar latent space ie they are correlated assert ( - np.abs( - np.corrcoef( - cca.transform([X, Y])[1].T, - pcca.posterior_samples["z"].mean(axis=0)[:, 0], - )[0, 1] - ) - > 0.9 + np.abs( + np.corrcoef( + cca.transform([X, Y])[1].T, + pcca.posterior_samples["z"].mean(axis=0)[:, 0], + )[0, 1] + ) + > 0.9 ) diff --git a/examples/plot_dcca.py b/examples/plot_dcca.py index e97d8884..0f10aa13 100644 --- a/examples/plot_dcca.py +++ b/examples/plot_dcca.py @@ -9,6 +9,7 @@ from matplotlib import pyplot as plt from cca_zoo import DCCA_EigenGame + # %% from cca_zoo.deepmodels import ( DCCA, @@ -41,7 +42,6 @@ max_epochs=EPOCHS, enable_checkpointing=False, log_every_n_steps=1, - ) trainer.fit(dcca, train_loader, val_loader) pairplot_label(dcca.transform(train_loader), train_labels, title="DCCA") @@ -55,7 +55,6 @@ max_epochs=EPOCHS, enable_checkpointing=False, log_every_n_steps=1, - ) trainer.fit(dcca_eg, train_loader, val_loader) pairplot_label(dcca_eg.transform(train_loader), train_labels, title="DCCA-EigenGame") @@ -69,7 +68,6 @@ max_epochs=EPOCHS, enable_checkpointing=False, log_every_n_steps=1, - ) trainer.fit(dcca_noi, train_loader, val_loader) pairplot_label( @@ -87,7 +85,6 @@ max_epochs=EPOCHS, enable_checkpointing=False, log_every_n_steps=1, - ) trainer.fit(dcca_sdl, train_loader, val_loader) pairplot_label( @@ -105,7 +102,6 @@ max_epochs=EPOCHS, enable_checkpointing=False, log_every_n_steps=1, - ) trainer.fit(barlowtwins, train_loader, val_loader) pairplot_label( diff --git a/examples/plot_dcca_custom_data.py b/examples/plot_dcca_custom_data.py index f2e44d7d..0b883e5f 100644 --- a/examples/plot_dcca_custom_data.py +++ b/examples/plot_dcca_custom_data.py @@ -6,6 +6,7 @@ """ import numpy as np + # %% # Imports # ----- diff --git a/examples/plot_dcca_multi.py b/examples/plot_dcca_multi.py index 6b38a8d7..5fe82417 100644 --- a/examples/plot_dcca_multi.py +++ b/examples/plot_dcca_multi.py @@ -9,9 +9,11 @@ from cca_zoo.deepmodels import ( DCCA, - DTCCA, objectives, + DTCCA, + objectives, ) from cca_zoo.deepmodels import architectures + # %% # Data # ----- diff --git a/examples/plot_validation.py b/examples/plot_validation.py index 1972c0e7..1873e5ab 100644 --- a/examples/plot_validation.py +++ b/examples/plot_validation.py @@ -61,14 +61,14 @@ def plot_learning_curve( - estimator, - title, - views, - axes=None, - ylim=None, - cv=None, - n_jobs=None, - train_sizes=np.linspace(0.1, 1.0, 5), + estimator, + title, + views, + axes=None, + ylim=None, + cv=None, + n_jobs=None, + train_sizes=np.linspace(0.1, 1.0, 5), ): """ Generate 3 plots: the test and training learning curve, the training diff --git a/setup.py b/setup.py index 2c580480..c0b84eb1 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,10 @@ with open("requirements.txt", "r") as f: REQUIRED_PACKAGES = f.read() -EXTRA_PACKAGES = {"deep": ["torch>=1.9.0", "torchvision", "pytorch-lightning"], - "probabilistic": ["jax", "numpyro", "arviz"]} +EXTRA_PACKAGES = { + "deep": ["torch>=1.9.0", "torchvision", "pytorch-lightning"], + "probabilistic": ["jax", "numpyro", "arviz"], +} EXTRA_PACKAGES["all"] = EXTRA_PACKAGES["deep"] + EXTRA_PACKAGES["probabilistic"] setup( @@ -33,7 +35,5 @@ ) - with open("requirements.txt", "r") as f: REQUIRED_PACKAGES = f.read() -