From 4344152712a5ecd239980b85f79ec9357ca8a7ed Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:41:18 +0000
Subject: [PATCH 01/45] Create .nojekyll

---
 docs/.nojekyll | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/.nojekyll

diff --git a/docs/.nojekyll b/docs/.nojekyll
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/docs/.nojekyll
@@ -0,0 +1 @@
+

From 82220752ed13677147b5378cac6e13c2d69c26f9 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:42:59 +0000
Subject: [PATCH 02/45] Create CNAME

---
 docs/CNAME | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/CNAME

diff --git a/docs/CNAME b/docs/CNAME
new file mode 100644
index 00000000..8162c10a
--- /dev/null
+++ b/docs/CNAME
@@ -0,0 +1 @@
+www.ccazoo.com
\ No newline at end of file

From b1dae5240697e1930fe8776299a9078e1224c04c Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:43:36 +0000
Subject: [PATCH 03/45] Delete CNAME

---
 docs/CNAME | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 docs/CNAME

diff --git a/docs/CNAME b/docs/CNAME
deleted file mode 100644
index 8162c10a..00000000
--- a/docs/CNAME
+++ /dev/null
@@ -1 +0,0 @@
-www.ccazoo.com
\ No newline at end of file

From e674f3b217db232fad645a05bbc541322826345d Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:50:12 +0000
Subject: [PATCH 04/45] Delete .nojekyll

---
 docs/.nojekyll | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 docs/.nojekyll

diff --git a/docs/.nojekyll b/docs/.nojekyll
deleted file mode 100644
index 8b137891..00000000
--- a/docs/.nojekyll
+++ /dev/null
@@ -1 +0,0 @@
-

From 9227fd71ff2f12448f4d6c71897062dbd2defe64 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:53:11 +0000
Subject: [PATCH 05/45] Create .readthedocs.yml

---
 .github/workflows/.readthedocs.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/.readthedocs.yml

diff --git a/.github/workflows/.readthedocs.yml b/.github/workflows/.readthedocs.yml
new file mode 100644
index 00000000..230cc246
--- /dev/null
+++ b/.github/workflows/.readthedocs.yml
@@ -0,0 +1,25 @@
+# .readthedocs.yml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+
+# Build documentation with MkDocs
+#mkdocs:
+#  configuration: mkdocs.yml
+
+# Optionally build your docs in additional formats such as PDF
+formats:
+  - pdf
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - method: pip
+      path: .

From 8656a831496e39eee2f4cc4b025a03176a03bb32 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:55:58 +0000
Subject: [PATCH 06/45] Update .readthedocs.yml

---
 .github/workflows/.readthedocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/.readthedocs.yml b/.github/workflows/.readthedocs.yml
index 230cc246..69f13514 100644
--- a/.github/workflows/.readthedocs.yml
+++ b/.github/workflows/.readthedocs.yml
@@ -1,6 +1,7 @@
 # .readthedocs.yml
 # Read the Docs configuration file
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+on [workflow_dispatch]
 
 # Required
 version: 2

From 96c67f2bf4c57094bbb543b102234bc8f0aa07ab Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:56:31 +0000
Subject: [PATCH 07/45] Update .readthedocs.yml

---
 .github/workflows/.readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/.readthedocs.yml b/.github/workflows/.readthedocs.yml
index 69f13514..da63b045 100644
--- a/.github/workflows/.readthedocs.yml
+++ b/.github/workflows/.readthedocs.yml
@@ -1,7 +1,7 @@
+on: [workflow_dispatch]
 # .readthedocs.yml
 # Read the Docs configuration file
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-on [workflow_dispatch]
 
 # Required
 version: 2

From ded8a9cea58cd9bf3952e3fc818a391940bc9d56 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:22:17 +0000
Subject: [PATCH 08/45] Delete .readthedocs.yml

---
 .github/workflows/.readthedocs.yml | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 .github/workflows/.readthedocs.yml

diff --git a/.github/workflows/.readthedocs.yml b/.github/workflows/.readthedocs.yml
deleted file mode 100644
index da63b045..00000000
--- a/.github/workflows/.readthedocs.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-on: [workflow_dispatch]
-# .readthedocs.yml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
-version: 2
-
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
-  configuration: docs/source/conf.py
-
-# Build documentation with MkDocs
-#mkdocs:
-#  configuration: mkdocs.yml
-
-# Optionally build your docs in additional formats such as PDF
-formats:
-  - pdf
-
-# Optionally set the version of Python and requirements required to build your docs
-python:
-  version: 3.7
-  install:
-    - method: pip
-      path: .

From 4f178b7b085f27809c90422ea86f855234f0c451 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:23:10 +0000
Subject: [PATCH 09/45] Create Makefile

---
 docs/Makefile | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 docs/Makefile

diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..d0c3cbf1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

From b27e2005c4e8d5a965dabac5eb99999ca17f7548 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:23:30 +0000
Subject: [PATCH 10/45] Create make.bat

---
 docs/make.bat | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/make.bat

diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 00000000..9534b018
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd

From 26e599aa1107aef659fce5148f7fd527acd9ffbb Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:24:28 +0000
Subject: [PATCH 11/45] Set theme jekyll-theme-cayman

---
 docs/_config.yml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/_config.yml

diff --git a/docs/_config.yml b/docs/_config.yml
new file mode 100644
index 00000000..c4192631
--- /dev/null
+++ b/docs/_config.yml
@@ -0,0 +1 @@
+theme: jekyll-theme-cayman
\ No newline at end of file

From 1ef1ffe1ff5b316ad5a0bc6a8021c9fcf3b272f4 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:29:28 +0000
Subject: [PATCH 12/45] Delete Makefile

---
 docs/Makefile | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 docs/Makefile

diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d0c3cbf1..00000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

From 307d8f08a8b9e4d2f7102407e739ee2ddfaaadea Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:29:35 +0000
Subject: [PATCH 13/45] Delete make.bat

---
 docs/make.bat | 35 -----------------------------------
 1 file changed, 35 deletions(-)
 delete mode 100644 docs/make.bat

diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 9534b018..00000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd

From 64496a01d086487b54421ce311cb08cba3111e83 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:29:48 +0000
Subject: [PATCH 14/45] Delete _config.yml

---
 docs/_config.yml | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 docs/_config.yml

diff --git a/docs/_config.yml b/docs/_config.yml
deleted file mode 100644
index c4192631..00000000
--- a/docs/_config.yml
+++ /dev/null
@@ -1 +0,0 @@
-theme: jekyll-theme-cayman
\ No newline at end of file

From 9c9395e7ba12ed87199cc0c878e2894c984cdf64 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 12:54:06 +0000
Subject: [PATCH 15/45] Create builddocs.yml

---
 .github/workflows/builddocs.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 .github/workflows/builddocs.yml

diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml
new file mode 100644
index 00000000..ee2e333f
--- /dev/null
+++ b/.github/workflows/builddocs.yml
@@ -0,0 +1,15 @@
+on: workflow_dispatch
+  # Build docs
+  - run:
+      name: make html
+      command: |
+        cd docs;
+        make html;
+  # Save the outputs
+  - store_artifacts:
+      path: docs/_build/html/
+      destination: dev
+  - persist_to_workspace:
+      root: docs/_build
+      paths:
+        - html

From 8c93763b50324a631518ae3c22b5395bf33dd590 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 23:13:33 +0000
Subject: [PATCH 16/45] Delete builddocs.yml

---
 .github/workflows/builddocs.yml | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 .github/workflows/builddocs.yml

diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml
deleted file mode 100644
index ee2e333f..00000000
--- a/.github/workflows/builddocs.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-on: workflow_dispatch
-  # Build docs
-  - run:
-      name: make html
-      command: |
-        cd docs;
-        make html;
-  # Save the outputs
-  - store_artifacts:
-      path: docs/_build/html/
-      destination: dev
-  - persist_to_workspace:
-      root: docs/_build
-      paths:
-        - html

From d06e913583f419f8a3e6aef8fe4d3fed35c188d6 Mon Sep 17 00:00:00 2001
From: James Chapman <56592797+jameschapman19@users.noreply.github.com>
Date: Mon, 15 Nov 2021 23:14:15 +0000
Subject: [PATCH 17/45] Update python-package.yml

---
 .github/workflows/python-package.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 1f766ea0..ad001a9e 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -13,6 +13,8 @@ jobs:
   build:
 
     runs-on: ubuntu-latest
+    env:
+      MODULE_NAME: cca_zoo
     strategy:
       matrix:
         python-version: [3.7, 3.8, 3.9]
@@ -44,3 +46,6 @@ jobs:
       uses: codecov/codecov-action@v1
       with:
         fail_ci_if_error: true
+    - name: Run doctests
+      run: |
+        pytest --doctest-modules --ignore=$MODULE_NAME/tests $MODULE_NAME

From 90ed640c92976684fa82bc89ff21f068c270b3f5 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 12:39:13 +0000
Subject: [PATCH 18/45] Small doc changes to section levels

---
 docs/source/api/iterative.rst | 61 +++++++++++++++++++++++------------
 docs/source/api/models.rst    | 18 +++++------
 2 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/docs/source/api/iterative.rst b/docs/source/api/iterative.rst
index e557d68e..1cb2557c 100644
--- a/docs/source/api/iterative.rst
+++ b/docs/source/api/iterative.rst
@@ -1,46 +1,65 @@
 Normal CCA and PLS by alternating least squares
---------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Quicker and more memory efficient for very large data
 
+
+CCA by Alternating Least Squares
+""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.CCA_ALS
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+PLS by Alternating Least Squares
+""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.PLS_ALS
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
 
 Sparsity Inducing Models
---------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Penalized Matrix Decomposition (Sparse PLS)
+"""""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.PMD
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+Sparse CCA by iterative lasso regression
+"""""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SCCA
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+Elastic CCA by MAXVAR
+"""""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.ElasticCCA
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+Span CCA
+"""""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SpanCCA
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+Parkhomenko (penalized) CCA
+"""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.ParkhomenkoCCA
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
+Sparse CCA by ADMM
+"""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SCCA_ADMM
-    :inherited-members:
-    :exclude-members: get_params, set_params
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
 Miscellaneous
---------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Sparse Weighted CCA
+"""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SWCCA
-    :inherited-members:
-    :exclude-members: get_params, set_params
\ No newline at end of file
+    :inherited^members:
+    :exclude^members: get_params, set_params
\ No newline at end of file
diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
index 2cfeaddc..914d0ace 100644
--- a/docs/source/api/models.rst
+++ b/docs/source/api/models.rst
@@ -6,19 +6,19 @@ Regularized Canonical Correlation Analysis and Partial Least Squares
 ------------------------------------------------------------------------
 
 Canonical Correlation Analysis
-****************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.rcca.CCA
     :inherited-members:
     :exclude-members: get_params, set_params
 
 Partial Least Squares
-****************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.rcca.PLS
     :inherited-members:
     :exclude-members: get_params, set_params
 
 Ridge Regularized Canonical Correlation Analysis
-****************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.rcca.rCCA
     :inherited-members:
     :exclude-members: get_params, set_params
@@ -27,13 +27,13 @@ GCCA and KGCCA
 ---------------------------
 
 Generalized (MAXVAR) Canonical Correlation Analysis
-****************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.gcca.GCCA
     :inherited-members:
     :exclude-members: get_params, set_params
 
 Kernel Generalized (MAXVAR) Canonical Correlation Analysis
-************************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.gcca.KGCCA
     :inherited-members:
     :exclude-members: get_params, set_params
@@ -42,13 +42,13 @@ MCCA and KCCA
 ---------------------------
 
 Multiset (SUMCOR) Canonical Correlation Analysis
-**************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.mcca.MCCA
     :inherited-members:
     :exclude-members: get_params, set_params
 
 Kernel Multiset (SUMCOR) Canonical Correlation Analysis
-********************************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.mcca.KCCA
     :inherited-members:
     :exclude-members: get_params, set_params
@@ -57,13 +57,13 @@ Tensor Canonical Correlation Analysis
 ----------------------------------------
 
 Tensor Canonical Correlation Analysis
-**************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.tcca.TCCA
     :inherited-members:
     :exclude-members: get_params, set_params
 
 Kernel Tensor Canonical Correlation Analysis
-**********************************************
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: cca_zoo.models.tcca.KTCCA
     :inherited-members:
     :exclude-members: get_params, set_params

From 5e94ec47f6ceea4f6ff373fbd454817dcf93b0de Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 12:41:09 +0000
Subject: [PATCH 19/45] Small doc changes to section levels

---
 docs/source/api/models.rst           |  2 +-
 docs/source/documentation/models.rst | 89 ----------------------------
 2 files changed, 1 insertion(+), 90 deletions(-)
 delete mode 100644 docs/source/documentation/models.rst

diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
index 914d0ace..fb4f64a9 100644
--- a/docs/source/api/models.rst
+++ b/docs/source/api/models.rst
@@ -72,7 +72,7 @@ More Complex Regularisation using Iterative Models
 -----------------------------------------------------
 
 .. toctree::
-   :maxdepth: 4
+   :maxdepth: 6
    iterative.rst
 
 Base Class
diff --git a/docs/source/documentation/models.rst b/docs/source/documentation/models.rst
deleted file mode 100644
index 9108d0fb..00000000
--- a/docs/source/documentation/models.rst
+++ /dev/null
@@ -1,89 +0,0 @@
-Models and sources
-===================
-
-Linear CCA/PLS:
----------------
-
-A variety of linear CCA and PLS methods implemented using alternating minimization methods for non-convex optimisation
-based on the power method or alternating least squares.
-
-GCCA (Generalized MAXVAR CCA):
--------------------------------
-
-The generalized eigenvalue problem form of generalized MAXVAR CCA. Maximises the squared correlation between each view projection and
-a shared auxiliary vector of unit length.
-
-https://academic.oup.com/biomet/article-abstract/58/3/433/233349?redirectedFrom=fulltext
-
-MCCA (Multiset SUMCOR CCA):
----------------------------
-
-The generalized eigenvalue problem form of multiset SUMCOR CCA. Maximises the pairwise sum of correlations between view
-projections.
-
-SCCA (Sparse CCA - Mai):
-------------------------
-
-A solution to the sparse CCA problem based on iterative rescaled lasso regression problems to ensure projections are unit length.
-
-https://onlinelibrary.wiley.com/doi/abs/10.1111/biom.13043?casa_token=pw8OSPmNkzEAAAAA:CcrMA_8g_2po011hQsGQXfiYyvtpBlSS6LJm-z_zANOg6t5YhpFZ-2YJNeCbJdHmT7GXIFZUU7gQl78
-
-PMD (Sparse PLS/PMD/Penalized Matrix Decomposition - Witten):
---------------------------------------------------------------
-
-A solution to a sparse CCA problem based on penalized matrix decomposition. The relaxation and assumptions made make this method
-more similar to an l1-regularized PLS
-
-https://academic.oup.com/biostatistics/article/10/3/515/293026
-
-PCCA (Penalized CCA - elastic net - Waaijenborg):
--------------------------------------------------
-
-A solution to the sparse CCA problem based on iterative rescaled elastic regression problems to ensure projections are unit length.
-
-https://pubmed.ncbi.nlm.nih.gov/19689958/
-
-SCCA_ADMM (Sparse canonical correlation analysis-Suo):
-------------------------------------------------------
-
-A solution to the sparse CCA problem based on iterative rescaled lasso regression problems solved using ADMM.
-
-https://arxiv.org/abs/1705.10865
-
-Kernel CCA
-----------
-
-CCA solved using the kernel method. Adding regularisation in the linear case can be shown to be equivalent to regularised CCA.
-
-DCCA (Deep CCA):
-----------------
-
-Using either Andrew's original Tracenorm Objective or Wang's alternating least squares solution
-
-https://ttic.uchicago.edu/-klivescu/papers/andrew_icml2013.pdf
-https://arxiv.org/pdf/1510.02054v1.pdf
-
-
-DGCCA (Deep Generalized CCA):
------------------------------
-
-An alternative objective based on the linear GCCA solution. Can be extended to more than 2 views
-
-https://www.aclweb.org/anthology/W19-4301.pdf
-
-DMCCA (Deep Multiset CCA):
---------------------------
-
-An alternative objective based on the linear MCCA solution. Can be extended to more than 2 views
-
-https://arxiv.org/abs/1904.01775
-
-DCCAE (Deep Canonically Correlated Autoencoders):
--------------------------------------------------
-
-http://proceedings.mlr.press/v37/wangb15.pdf
-
-DVCCA/DVCCA Private (Deep variational CCA):
--------------------------------------------
-
-https://arxiv.org/pdf/1610.03454.pdf
\ No newline at end of file

From 14f3f434f71cbf4fd71ea7535976c76ddb96380b Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 12:46:01 +0000
Subject: [PATCH 20/45] Small doc changes to section levels

---
 docs/source/api/iterative.rst         | 65 -------------------------
 docs/source/api/models.rst            | 68 +++++++++++++++++++++++++--
 docs/source/documentation/install.rst |  1 +
 3 files changed, 66 insertions(+), 68 deletions(-)
 delete mode 100644 docs/source/api/iterative.rst

diff --git a/docs/source/api/iterative.rst b/docs/source/api/iterative.rst
deleted file mode 100644
index 1cb2557c..00000000
--- a/docs/source/api/iterative.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-Normal CCA and PLS by alternating least squares
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Quicker and more memory efficient for very large data
-
-
-CCA by Alternating Least Squares
-""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.CCA_ALS
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-PLS by Alternating Least Squares
-""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.PLS_ALS
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-
-Sparsity Inducing Models
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Penalized Matrix Decomposition (Sparse PLS)
-"""""""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.PMD
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Sparse CCA by iterative lasso regression
-"""""""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.SCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Elastic CCA by MAXVAR
-"""""""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.ElasticCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Span CCA
-"""""""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.SpanCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Parkhomenko (penalized) CCA
-"""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.ParkhomenkoCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Sparse CCA by ADMM
-"""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.SCCA_ADMM
-    :inherited^members:
-    :exclude^members: get_params, set_params
-
-Miscellaneous
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Sparse Weighted CCA
-"""""""""""""""""""""""""""""""""""""""""""
-.. autoclass:: cca_zoo.models.SWCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
\ No newline at end of file
diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
index fb4f64a9..f224cfc9 100644
--- a/docs/source/api/models.rst
+++ b/docs/source/api/models.rst
@@ -71,9 +71,71 @@ Kernel Tensor Canonical Correlation Analysis
 More Complex Regularisation using Iterative Models
 -----------------------------------------------------
 
-.. toctree::
-   :maxdepth: 6
-   iterative.rst
+Normal CCA and PLS by alternating least squares
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Quicker and more memory efficient for very large data
+
+
+CCA by Alternating Least Squares
+""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.CCA_ALS
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+PLS by Alternating Least Squares
+""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.PLS_ALS
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+
+Sparsity Inducing Models
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Penalized Matrix Decomposition (Sparse PLS)
+"""""""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.PMD
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Sparse CCA by iterative lasso regression
+"""""""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.SCCA
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Elastic CCA by MAXVAR
+"""""""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.ElasticCCA
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Span CCA
+"""""""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.SpanCCA
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Parkhomenko (penalized) CCA
+"""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.ParkhomenkoCCA
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Sparse CCA by ADMM
+"""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.SCCA_ADMM
+    :inherited^members:
+    :exclude^members: get_params, set_params
+
+Miscellaneous
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sparse Weighted CCA
+"""""""""""""""""""""""""""""""""""""""""""
+.. autoclass:: cca_zoo.models.SWCCA
+    :inherited^members:
+    :exclude^members: get_params, set_params
 
 Base Class
 --------------------------------
diff --git a/docs/source/documentation/install.rst b/docs/source/documentation/install.rst
index 4835b408..90adc16c 100644
--- a/docs/source/documentation/install.rst
+++ b/docs/source/documentation/install.rst
@@ -25,6 +25,7 @@ with windows installation, we do not install them by default. To access these,
 
 * [deep]: ``Deep Learning Based Models``
 * [probabilistic]: ``Probabilistic Models``
+* [all]: ``Include both Probabilistic and Deep Learning Based Models``
 
 If you wish to use these functions, you must install their required dependencies. These are listed in the package requirements folder with corresponding keyword names for manual installation or can be installed from PyPI by simply calling::
 

From 9b7ad2dbc1323bc3f538d2896944c28dd0747898 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 12:59:31 +0000
Subject: [PATCH 21/45] Small doc changes to section levels

---
 docs/source/api/models.rst | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst
index f224cfc9..78240b66 100644
--- a/docs/source/api/models.rst
+++ b/docs/source/api/models.rst
@@ -79,14 +79,14 @@ Quicker and more memory efficient for very large data
 CCA by Alternating Least Squares
 """"""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.CCA_ALS
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 PLS by Alternating Least Squares
 """"""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.PLS_ALS
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 
 Sparsity Inducing Models
@@ -95,38 +95,38 @@ Sparsity Inducing Models
 Penalized Matrix Decomposition (Sparse PLS)
 """""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.PMD
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Sparse CCA by iterative lasso regression
 """""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Elastic CCA by MAXVAR
 """""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.ElasticCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Span CCA
 """""""""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SpanCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Parkhomenko (penalized) CCA
 """""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.ParkhomenkoCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Sparse CCA by ADMM
 """""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SCCA_ADMM
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Miscellaneous
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -134,8 +134,8 @@ Miscellaneous
 Sparse Weighted CCA
 """""""""""""""""""""""""""""""""""""""""""
 .. autoclass:: cca_zoo.models.SWCCA
-    :inherited^members:
-    :exclude^members: get_params, set_params
+    :inherited-members:
+    :exclude-members: get_params, set_params
 
 Base Class
 --------------------------------

From 75101edb428e0c2fa8bb16a3b67f6f8b9b8ca166 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 13:33:06 +0000
Subject: [PATCH 22/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py            | 82 ++---------------------
 cca_zoo/models/innerloop.py               |  3 -
 examples/plot_dcca_custom.py              |  2 +-
 examples/plot_hyperparameter_selection.py | 63 +++++++++++++++++
 examples/plot_many_views.py               | 60 +++++++++++++++++
 examples/plot_ridge_reg.py                |  6 --
 6 files changed, 131 insertions(+), 85 deletions(-)
 delete mode 100644 examples/plot_ridge_reg.py

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 6e36f71e..b334393c 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -1,6 +1,5 @@
 import itertools
-import sys
-from typing import Optional, Union
+from typing import Optional
 
 import numpy as np
 import torch
@@ -12,20 +11,10 @@
 
 class CCALightning(LightningModule):
     def __init__(
-        self,
-        model: _DCCA_base,
-        optimizer: Union[torch.optim.Optimizer, str] = "Adam",
-        learning_rate: float = 1e-3,
-        weight_decay: float = 0.1,
-        lr_scheduler: torch.optim.lr_scheduler._LRScheduler = None,
-        StepLR_step_size: float = None,
-        StepLR_gamma: float = None,
-        lr_factor: float = None,
-        lr_patience: float = None,
-        OneCycleLR_max_lr: float = None,
-        OneCycleLR_epochs: float = None,
-        train_trajectories: float = None,
-        T: float = None,
+            self,
+            model: _DCCA_base,
+            optimizer: torch.optim.Optimizer = None,
+            scheduler: torch.optim.lr_scheduler = None,
     ):
         """
 
@@ -33,15 +22,6 @@ def __init__(
         :param optimizer: a pytorch optimizer with parameters from model or a string like 'Adam' to use Adam optimizer with default parameters or those specified by the user
         :param learning_rate: learning rate used when optimizer is instantiated with a string
         :param weight_decay: weight decay used when optimizer is instantiated with a string
-        :param lr_scheduler: a pytorch learning rate scheduler or a string like "StepLR" or None
-        :param StepLR_step_size: step size used by "StepLR"
-        :param StepLR_gamma: gamma used by "StepLR"
-        :param lr_factor: factor used by "ReduceLROnPlateau"
-        :param lr_patience: patience used by "ReduceLROnPlateau"
-        :param OneCycleLR_max_lr: max lr used by "OneCycleLR"
-        :param OneCycleLR_epochs: epochs used by "OneCycleLR"
-        :param train_trajectories: train trajectories used by "OneCycleLR"
-        :param T: T used by "OneCycleLR"
         """
         super().__init__()
         self.save_hyperparameters()
@@ -58,65 +38,17 @@ def loss(self, *args, **kwargs):
     def configure_optimizers(self):
         if isinstance(self.hparams.optimizer, torch.optim.Optimizer):
             optimizer = self.hparams.optimizer
-        elif self.hparams.optimizer == "Adam":
+        else:
             optimizer = torch.optim.Adam(
                 self.parameters(),
                 lr=self.hparams.learning_rate,
                 weight_decay=self.hparams.weight_decay,
             )
-        elif self.hparams.optimizer == "SGD":
-            # Left out the momentum options for now
-            optimizer = torch.optim.SGD(
-                self.parameters(),
-                lr=self.hparams.learning_rate,
-                weight_decay=self.hparams.weight_decay,
-            )
-        elif self.hparams.optimizer == "LBFGS":
-            optimizer = torch.optim.LBFGS(
-                self.parameters(),
-                # or can have self.hparams.learning_rate with warning if too low.
-                lr=1,
-                tolerance_grad=1e-5,  # can add to parameters if useful.
-                tolerance_change=1e-9,  # can add to parameters if useful.
-            )
-        else:
-            print("Invalid optimizer.  See --help")
-            sys.exit()
 
         if self.hparams.lr_scheduler is None:
             return optimizer
-        elif isinstance(
-            self.hparams.lr_scheduler, torch.optim.lr_scheduler._LRScheduler
-        ):
-            scheduler = self.hparams.lr_scheduler
-        elif self.hparams.lr_scheduler == "StepLR":
-            step_size = self.hparams.StepLR_step_size
-            gamma = self.hparams.StepLR_gamma
-            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma)
-        elif self.hparams.lr_scheduler == "ReduceLROnPlateau":
-            factor = self.hparams.lr_factor
-            patience = self.hparams.lr_patience
-            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizer, mode="min", factor=factor, patience=patience
-            )
-            return {
-                "optimizer": optimizer,
-                "lr_scheduler": scheduler,
-                "monitor": self.hparams.LRScheduler_metric,
-            }
-        elif self.hparams.lr_scheduler == "OneCycleLR":
-            max_lr = self.hparams.OneCycleLR_max_lr
-            epochs = self.hparams.OneCycleLR_epochs
-            steps_per_epoch = self.hparams.train_trajectories * (self.hparams.T + 1)
-            scheduler = torch.optim.lr_scheduler.OneCycleLR(
-                optimizer,
-                max_lr=max_lr,
-                epochs=epochs,
-                steps_per_epoch=steps_per_epoch,
-            )
         else:
-            print("Invalid scheduler configuration.  See --help")
-            raise
+            scheduler = self.hparams.lr_scheduler
         return [optimizer], [scheduler]
 
     def training_step(self, batch, batch_idx):
diff --git a/cca_zoo/models/innerloop.py b/cca_zoo/models/innerloop.py
index fe40eed2..13ca70ed 100644
--- a/cca_zoo/models/innerloop.py
+++ b/cca_zoo/models/innerloop.py
@@ -73,9 +73,6 @@ def _initialize(self):
     def _fit(self, *views: np.ndarray):
         self.views = views
         self.n = views[0].shape[0]
-        if len(self.views) > 2:
-            self.generalized = True
-            warnings.warn("For more than 2 views require generalized=True")
 
         # Check that the parameters that have been passed are valid for these views given #views and #features
         self._check_params()
diff --git a/examples/plot_dcca_custom.py b/examples/plot_dcca_custom.py
index 694943b7..4f89090a 100644
--- a/examples/plot_dcca_custom.py
+++ b/examples/plot_dcca_custom.py
@@ -32,8 +32,8 @@
 
 # Deep CCA
 dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
-dcca = CCALightning(dcca)
 optimizer = optim.Adam(dcca.parameters(), lr=1e-3)
 scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 1)
+dcca = CCALightning(dcca, optimizer=optimizer, scheduler=scheduler)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
diff --git a/examples/plot_hyperparameter_selection.py b/examples/plot_hyperparameter_selection.py
index 0d687e88..1bfb4714 100644
--- a/examples/plot_hyperparameter_selection.py
+++ b/examples/plot_hyperparameter_selection.py
@@ -4,3 +4,66 @@
 
 This script will show how to perform hyperparameter selection
 """
+
+# %%
+import numpy as np
+import pandas as pd
+from sklearn.utils.fixes import loguniform
+
+from cca_zoo.data import generate_covariance_data
+from cca_zoo.model_selection import GridSearchCV, RandomizedSearchCV
+from cca_zoo.models import KCCA
+
+# %%
+np.random.seed(42)
+n = 200
+p = 100
+q = 100
+latent_dims = 1
+cv = 3
+
+(X, Y), (tx, ty) = generate_covariance_data(
+    n, view_features=[p, q], latent_dims=latent_dims, correlation=[0.9]
+)
+
+"""
+Grid Search
+--------------------
+
+Hyperparameter selection works in a very similar way to in scikit-learn where the main difference is in how we enter the parameter grid.
+We form a parameter grid with the search space for each view for each parameter.
+This search space must be entered as a list but can be any of
+- a single value (as in "kernel") where this value will be used for each view
+- a list for each view
+- a mixture of a single value for one view and a distribution or list for the other
+"""
+
+# %%
+# Linear Grid Search
+param_grid = {"kernel": ["poly"], "c": [[1e-1], [1e-1, 2e-1]], "degree": [[2], [2, 3]]}
+kernel_reg = (
+    GridSearchCV(
+        KCCA(latent_dims=latent_dims), param_grid=param_grid, cv=cv, verbose=True
+    )
+        .fit([X, Y])
+)
+
+"""
+Randomized Search
+--------------------
+
+With Randomized Search we can additionally use distributions from scikit-learn to define the parameter search space
+"""
+
+# %%
+# Linear Randomized Search
+param_grid = {"kernel": ["poly"], "c": [loguniform(1e-1, 2e-1), [1e-1]], "degree": [[2], [2, 3]]}
+kernel_reg = (
+    RandomizedSearchCV(
+        KCCA(latent_dims=latent_dims), param_distributions=param_grid, cv=cv, verbose=True
+    )
+        .fit([X, Y])
+)
+
+# %%
+print(pd.DataFrame(kernel_reg.cv_results_))
diff --git a/examples/plot_many_views.py b/examples/plot_many_views.py
index 098e52c1..4eb50fe0 100644
--- a/examples/plot_many_views.py
+++ b/examples/plot_many_views.py
@@ -4,3 +4,63 @@
 
 This will compare MCCA, GCCA, TCCA for linear models with more than 2 views
 """
+# %%
+import numpy as np
+
+from cca_zoo.data import generate_covariance_data
+from cca_zoo.models import MCCA, GCCA, TCCA, KCCA, KGCCA, KTCCA, PMD
+
+# %%
+np.random.seed(42)
+n = 200
+p = 10
+q = 10
+r = 10
+latent_dims = 1
+cv = 3
+
+(X, Y, Z), (tx, ty, tz) = generate_covariance_data(
+    n, view_features=[p, q, r], latent_dims=latent_dims, correlation=[0.9]
+)
+
+"""
+Eigendecomposition-Based Methods
+---------------------------------
+"""
+
+# %%
+mcca = MCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+# %%
+gcca = GCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+"""
+We can also use kernel versions of these methods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""
+
+# %%
+kcca = KCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+# %%
+kgcca = KGCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+"""
+Higher order correlation methods
+---------------------------------
+"""
+
+# %%
+# Tensor CCA finds higher order correlations so scores are not comparable (but TCCA is equivalent for 2 views)
+tcca = TCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+# %%
+ktcca = KTCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
+
+"""
+Iterative Methods
+---------------------
+
+Most of the iterative methods can also use multiple views e.g.
+"""
+pmd = PMD(latent_dims=latent_dims, c=1).fit((X, Y, X)).score((X, Y, Z))
diff --git a/examples/plot_ridge_reg.py b/examples/plot_ridge_reg.py
deleted file mode 100644
index ad63399b..00000000
--- a/examples/plot_ridge_reg.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""
-Ridge Regularised CCA: From CCA to PLS
-===========================
-
-This script will show how CCA and PLS form opposite ends of a ridge regularisation spectrum
-"""

From 3de1634433b1e6cb0bc6c8239398886e92a3bbc5 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 14:21:58 +0000
Subject: [PATCH 23/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py | 6 ++----
 examples/plot_many_views.py    | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index b334393c..6edd8958 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -19,9 +19,8 @@ def __init__(
         """
 
         :param model: a model instance from deepmodels
-        :param optimizer: a pytorch optimizer with parameters from model or a string like 'Adam' to use Adam optimizer with default parameters or those specified by the user
-        :param learning_rate: learning rate used when optimizer is instantiated with a string
-        :param weight_decay: weight decay used when optimizer is instantiated with a string
+        :param optimizer: a pytorch optimizer with parameters from model
+        :param scheduler: a pytorch scheduler
         """
         super().__init__()
         self.save_hyperparameters()
@@ -34,7 +33,6 @@ def forward(self, *args):
     def loss(self, *args, **kwargs):
         return self.model.loss(*args, **kwargs)
 
-    # Configuration.  Add more for learning schedulers, etc.?
     def configure_optimizers(self):
         if isinstance(self.hparams.optimizer, torch.optim.Optimizer):
             optimizer = self.hparams.optimizer
diff --git a/examples/plot_many_views.py b/examples/plot_many_views.py
index 4eb50fe0..88a28400 100644
--- a/examples/plot_many_views.py
+++ b/examples/plot_many_views.py
@@ -12,10 +12,10 @@
 
 # %%
 np.random.seed(42)
-n = 200
-p = 10
-q = 10
-r = 10
+n = 30
+p = 3
+q = 3
+r = 3
 latent_dims = 1
 cv = 3
 

From dfe46eef38ecd8e66b6755aa3c1f8576ee0dcf47 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 14:25:23 +0000
Subject: [PATCH 24/45] Small doc changes updating examples

---
 docs/source/documentation/getting_started.rst |  6 ++---
 docs/source/documentation/user_guide.rst      | 26 +++----------------
 2 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/docs/source/documentation/getting_started.rst b/docs/source/documentation/getting_started.rst
index ba05ad6e..4b4ecac0 100644
--- a/docs/source/documentation/getting_started.rst
+++ b/docs/source/documentation/getting_started.rst
@@ -12,12 +12,10 @@ Look how easy it is to use:
    from cca_zoo.models import CCA
    from cca_zoo.data import generate_covariance_data
    # %%
-   (train_view_1,train_view_2),(true_weights_1,true_weights_2)=generate_covariance_data(n=200,view_features=[10,10],latent_dims=1,correlation=1)
+   n_samples=100
+   (train_view_1,train_view_2),(true_weights_1,true_weights_2)=generate_covariance_data(n=n_samples,view_features=[10,10],latent_dims=1,correlation=1)
 
    linear_cca = CCA(latent_dims=latent_dims, max_iter=max_iter)
 
    linear_cca.fit((train_view_1, train_view_2))
 
-In addition to the code snippets in the user guide section, we provide a number of tutorial notebooks hosted on google
-colab at https://github.com/jameschapman19/cca_zoo/tree/main/tutorial_notebooks
-
diff --git a/docs/source/documentation/user_guide.rst b/docs/source/documentation/user_guide.rst
index 79cc0f30..bac37bdc 100644
--- a/docs/source/documentation/user_guide.rst
+++ b/docs/source/documentation/user_guide.rst
@@ -44,7 +44,7 @@ to use a data driven approach.
 Model Transforms
 -----------------
 
-One models are fit we can transform the data to latent projections for each view
+Once models are fit we can transform the data to latent projections for each view
 
 .. sourcecode:: python
 
@@ -97,25 +97,5 @@ We build our deep cca model using these encoders as inputs:
    from cca_zoo.deepmodels import DCCA
    dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
 
-This produces a PyTorch.nn.Module object which can be updated in a customised training loop. As a quick start, we also
-provide a DeepWrapper class which wraps the deep cca model and its training loop so that it shares the fit(), transform()
-and score() methods of the other models in the package.
-
-.. sourcecode:: python
-
-   from cca_zoo.deepmodels import DeepWrapper
-   dcca_model = DeepWrapper(dcca_model)
-   #datasets can be pytorch datasets which output ((view_1,view_2),label) or 2 or more numpy arrays
-   dcca_model.fit(train_dataset, val_dataset=val_dataset, epochs=epochs)
-
-We can now use:
-
-.. sourcecode:: python
-
-   dcca_model.score(train_dataset)
-
-And:
-
-.. sourcecode:: python
-
-   projection_1,projection_2=dcca_model.transform(train_dataset)
\ No newline at end of file
+This produces a PyTorch.nn.Module object which can be updated in a customised training loop. We also provide a LightningModule
+class from pytorch-lightning which can be used to train any of these models.
\ No newline at end of file

From b4f1d2856d8a4ba6ea7a433025e2a8ec3c83cba5 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 14:26:51 +0000
Subject: [PATCH 25/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 6edd8958..3a025789 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -39,7 +39,7 @@ def configure_optimizers(self):
         else:
             optimizer = torch.optim.Adam(
                 self.parameters(),
-                lr=self.hparams.learning_rate,
+                lr=1e-3,
                 weight_decay=self.hparams.weight_decay,
             )
 

From d316f94ef18734c83bbc96035413d2b48e8ab6f3 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 14:27:45 +0000
Subject: [PATCH 26/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 3a025789..3c8fedec 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -14,7 +14,7 @@ def __init__(
             self,
             model: _DCCA_base,
             optimizer: torch.optim.Optimizer = None,
-            scheduler: torch.optim.lr_scheduler = None,
+            lr_scheduler: torch.optim.lr_scheduler = None,
     ):
         """
 

From 6db3cb4b800f6f3d3db7086f2e8d6e4aad1ca100 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 14:30:13 +0000
Subject: [PATCH 27/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 3c8fedec..d195fa89 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -40,7 +40,6 @@ def configure_optimizers(self):
             optimizer = torch.optim.Adam(
                 self.parameters(),
                 lr=1e-3,
-                weight_decay=self.hparams.weight_decay,
             )
 
         if self.hparams.lr_scheduler is None:

From 8de2503bff7acbec459dca49b09ff2a3234b1972 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 16:06:13 +0000
Subject: [PATCH 28/45] Small doc changes updating examples

---
 cca_zoo/deepmodels/trainers.py  |  2 +-
 cca_zoo/test/test_deepmodels.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index d195fa89..25dbc906 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -20,7 +20,7 @@ def __init__(
 
         :param model: a model instance from deepmodels
         :param optimizer: a pytorch optimizer with parameters from model
-        :param scheduler: a pytorch scheduler
+        :param lr_scheduler: a pytorch scheduler
         """
         super().__init__()
         self.save_hyperparameters()
diff --git a/cca_zoo/test/test_deepmodels.py b/cca_zoo/test/test_deepmodels.py
index 01cd4124..1f01d799 100644
--- a/cca_zoo/test/test_deepmodels.py
+++ b/cca_zoo/test/test_deepmodels.py
@@ -250,14 +250,15 @@ def test_linear():
     encoder_1 = architectures.LinearEncoder(latent_dims=1, feature_size=10)
     encoder_2 = architectures.LinearEncoder(latent_dims=1, feature_size=12)
     dcca = DCCA(latent_dims=1, encoders=[encoder_1, encoder_2])
-    dcca = CCALightning(dcca, learning_rate=1e-1)
+    optimizer = optim.Adam(dcca.parameters(), lr=1e-1)
+    dcca = CCALightning(dcca, optimizer=optimizer)
     trainer = pl.Trainer(max_epochs=50, enable_checkpointing=False)
     trainer.fit(dcca, loader)
     cca = CCA().fit((X, Y))
     # check linear encoder with SGD matches vanilla linear CCA
     assert (
-        np.testing.assert_array_almost_equal(
-            cca.score((X, Y)), trainer.model.score(loader), decimal=2
-        )
-        is None
+            np.testing.assert_array_almost_equal(
+                cca.score((X, Y)), trainer.model.score(loader), decimal=2
+            )
+            is None
     )

From 322c78ee8e8f8b0c7ce8f87c3689129f5efca41f Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 16:15:04 +0000
Subject: [PATCH 29/45] Removing brainnet to avoid feature creep

---
 cca_zoo/deepmodels/architectures.py | 97 +----------------------------
 cca_zoo/test/test_deepmodels.py     | 68 ++++++++++----------
 2 files changed, 35 insertions(+), 130 deletions(-)

diff --git a/cca_zoo/deepmodels/architectures.py b/cca_zoo/deepmodels/architectures.py
index 31bf2f1f..0a24044a 100644
--- a/cca_zoo/deepmodels/architectures.py
+++ b/cca_zoo/deepmodels/architectures.py
@@ -1,9 +1,8 @@
 from abc import abstractmethod
 from math import sqrt
-from typing import Iterable, Tuple
+from typing import Iterable
 
 import torch
-from torch.nn import functional as F
 
 
 class BaseEncoder(torch.nn.Module):
@@ -271,92 +270,6 @@ def forward(self, x):
         return x
 
 
-# https://github.com/nicofarr/brainnetcnnVis_pytorch/blob/master/BrainNetCnnGoldMSI.py
-class E2EBlock(torch.nn.Module):
-    def __init__(self, in_planes, planes, size, bias=False):
-        super(E2EBlock, self).__init__()
-        self.d = size
-        self.cnn1 = torch.nn.Conv2d(in_planes, planes, (1, self.d), bias=bias)
-        self.cnn2 = torch.nn.Conv2d(in_planes, planes, (self.d, 1), bias=bias)
-
-    def forward(self, x):
-        a = self.cnn1(x)
-        b = self.cnn2(x)
-        return torch.cat([a] * self.d, 3) + torch.cat([b] * self.d, 2)
-
-
-class E2EBlockReverse(torch.nn.Module):
-    def __init__(self, in_planes, planes, size, bias=False):
-        super(E2EBlockReverse, self).__init__()
-
-        self.d = size
-        self.cnn1 = torch.nn.ConvTranspose2d(in_planes, planes, (1, self.d), bias=bias)
-        self.cnn2 = torch.nn.ConvTranspose2d(in_planes, planes, (self.d, 1), bias=bias)
-
-    def forward(self, x):
-        a = self.cnn1(x)
-        b = self.cnn2(x)
-        return torch.cat([a] * self.d, 3) + torch.cat([b] * self.d, 2)
-
-
-# BrainNetCNN Network for fitting Gold-MSI on LSD dataset
-class BrainNetEncoder(BaseEncoder):
-    def __init__(
-        self,
-        latent_dims: int,
-        variational: bool = False,
-        feature_size: Tuple[int] = (200, ...),
-    ):
-        super(BrainNetEncoder, self).__init__(latent_dims, variational=variational)
-        _check_feature_size(feature_size)
-        self.d = feature_size[0]
-        self.e2econv1 = E2EBlock(1, 32, self.d, bias=True)
-        self.e2econv2 = E2EBlock(32, 64, self.d, bias=True)
-        self.E2N = torch.nn.Conv2d(64, 1, (1, self.d))
-        self.N2G = torch.nn.Conv2d(1, 256, (self.d, 1))
-        self.dense1 = torch.nn.Linear(256, 128)
-        self.dense2 = torch.nn.Linear(128, 30)
-        self.dense3 = torch.nn.Linear(30, latent_dims)
-
-    def forward(self, x):
-        out = F.leaky_relu(self.e2econv1(x), negative_slope=0.33)  # B,32,200,200
-        out = F.leaky_relu(self.e2econv2(out), negative_slope=0.33)  # B,64,200,200
-        out = F.leaky_relu(self.E2N(out), negative_slope=0.33)  # B,1,200,1
-        out = F.dropout(
-            F.leaky_relu(self.N2G(out), negative_slope=0.33), p=0.5
-        )  # B,256,1,1
-        out = out.view(out.shape[0], -1)  # B,256
-        out = F.dropout(F.leaky_relu(self.dense1(out), negative_slope=0.33), p=0.5)
-        out = F.dropout(F.leaky_relu(self.dense2(out), negative_slope=0.33), p=0.5)
-        out = F.leaky_relu(self.dense3(out), negative_slope=0.33)
-        return out
-
-
-class BrainNetDecoder(BaseDecoder):
-    def __init__(self, latent_dims: int, feature_size: Tuple[int] = (200, ...)):
-        super(BrainNetDecoder, self).__init__(latent_dims)
-        _check_feature_size(feature_size)
-        self.d = feature_size[0]
-        self.e2econv1 = E2EBlock(32, 1, self.d, bias=True)
-        self.e2econv2 = E2EBlock(64, 32, self.d, bias=True)
-        self.E2N = torch.nn.ConvTranspose2d(1, 64, (1, self.d))
-        self.N2G = torch.nn.ConvTranspose2d(256, 1, (self.d, 1))
-        self.dense1 = torch.nn.Linear(128, 256)
-        self.dense2 = torch.nn.Linear(30, 128)
-        self.dense3 = torch.nn.Linear(latent_dims, 30)
-
-    def forward(self, x):
-        out = F.dropout(F.leaky_relu(self.dense3(x), negative_slope=0.33), p=0.5)
-        out = F.dropout(F.leaky_relu(self.dense2(out), negative_slope=0.33), p=0.5)
-        out = F.leaky_relu(self.dense1(out), negative_slope=0.33)
-        out = out.view(out.size(0), out.size(1), 1, 1)
-        out = F.dropout(F.leaky_relu(self.N2G(out), negative_slope=0.33), p=0.5)
-        out = F.leaky_relu(self.E2N(out), negative_slope=0.33)
-        out = F.leaky_relu(self.e2econv2(out), negative_slope=0.33)
-        out = F.leaky_relu(self.e2econv1(out), negative_slope=0.33)
-        return out
-
-
 class LinearEncoder(BaseEncoder):
     def __init__(self, latent_dims: int, feature_size: int, variational: bool = False):
         super(LinearEncoder, self).__init__(latent_dims, variational=variational)
@@ -386,11 +299,3 @@ def __init__(self, latent_dims: int, feature_size: int):
     def forward(self, x):
         out = self.linear(x)
         return out
-
-
-def _check_feature_size(feature_size):
-    if feature_size[0] != feature_size[1]:
-        raise ValueError(
-            "BrainNetCNN requires a pair of feature_size of the"
-            f"same value. feature_size={feature_size}."
-        )
diff --git a/cca_zoo/test/test_deepmodels.py b/cca_zoo/test/test_deepmodels.py
index 1f01d799..b301463e 100644
--- a/cca_zoo/test/test_deepmodels.py
+++ b/cca_zoo/test/test_deepmodels.py
@@ -25,8 +25,8 @@
 X = rng.rand(200, 10)
 Y = rng.rand(200, 12)
 Z = rng.rand(200, 14)
-X_conv = rng.rand(100, 1, 16, 16)
-Y_conv = rng.rand(100, 1, 16, 16)
+X_conv = rng.rand(200, 1, 16, 16)
+Y_conv = rng.rand(200, 1, 16, 16)
 dataset = data.CCA_Dataset([X, Y, Z])
 train_dataset, val_dataset = process_data(dataset, val_split=0.2)
 train_dataset_numpy, val_dataset_numpy = process_data((X, Y, Z), val_split=0.2)
@@ -53,10 +53,10 @@ def test_DCCA_methods():
     )
     trainer.fit(dcca_noi, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
     # Soft Decorrelation (stochastic Decorrelation Loss)
     encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
@@ -67,10 +67,10 @@ def test_DCCA_methods():
     trainer = pl.Trainer(max_epochs=epochs, log_every_n_steps=10)
     trainer.fit(sdl, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
     # DCCA
     encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
@@ -87,10 +87,10 @@ def test_DCCA_methods():
     )
     trainer.fit(dcca, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
     # DGCCA
     encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
@@ -107,10 +107,10 @@ def test_DCCA_methods():
     )
     trainer.fit(dgcca, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
     # DMCCA
     encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
@@ -127,10 +127,10 @@ def test_DCCA_methods():
     )
     trainer.fit(dmcca, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
     # Barlow Twins
     encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
@@ -146,18 +146,18 @@ def test_DCCA_methods():
     )
     trainer.fit(barlowtwins, train_loader)
     assert (
-        np.testing.assert_array_less(
-            cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
-        )
-        is None
+            np.testing.assert_array_less(
+                cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()
+            )
+            is None
     )
 
 
 def test_DTCCA_methods():
     latent_dims = 2
     epochs = 5
-    encoder_1 = architectures.Encoder(latent_dims=10, feature_size=10)
-    encoder_2 = architectures.Encoder(latent_dims=10, feature_size=12)
+    encoder_1 = architectures.CNNEncoder(latent_dims=10, feature_size=(16, 16))
+    encoder_2 = architectures.CNNEncoder(latent_dims=10, feature_size=(16, 16))
     dtcca = DTCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
     dtcca = CCALightning(dtcca)
     trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
@@ -166,17 +166,17 @@ def test_DTCCA_methods():
 
 def test_DCCAE_methods():
     latent_dims = 2
-    encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=10)
-    encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=12)
-    decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=10)
-    decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=12)
+    encoder_1 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(16, 16))
+    encoder_2 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(16, 16))
+    decoder_1 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(16, 16))
+    decoder_2 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(16, 16))
     # SplitAE
     splitae = SplitAE(
         latent_dims=latent_dims, encoder=encoder_1, decoders=[decoder_1, decoder_2]
     )
     splitae = CCALightning(splitae)
     trainer = pl.Trainer(max_epochs=5, enable_checkpointing=False)
-    trainer.fit(splitae, train_loader)
+    trainer.fit(splitae, conv_loader)
     # DCCAE
     dccae = DCCAE(
         latent_dims=latent_dims,
@@ -185,7 +185,7 @@ def test_DCCAE_methods():
     )
     dccae = CCALightning(dccae)
     trainer = pl.Trainer(max_epochs=5, enable_checkpointing=False)
-    trainer.fit(dccae, train_loader)
+    trainer.fit(dccae, conv_loader)
 
 
 def test_DVCCA_p_methods():

From 36fe11c2af9d8407ff1a411b0f707bbe406323e7 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 16:16:26 +0000
Subject: [PATCH 30/45] Removing brainnet to avoid feature creep

---
 examples/plot_dcca_custom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/plot_dcca_custom.py b/examples/plot_dcca_custom.py
index 4f89090a..8827cd81 100644
--- a/examples/plot_dcca_custom.py
+++ b/examples/plot_dcca_custom.py
@@ -34,6 +34,6 @@
 dcca = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
 optimizer = optim.Adam(dcca.parameters(), lr=1e-3)
 scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 1)
-dcca = CCALightning(dcca, optimizer=optimizer, scheduler=scheduler)
+dcca = CCALightning(dcca, optimizer=optimizer, lr_scheduler=scheduler)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)

From a662f11b712b9c210bb89af1073bf7bc4254da20 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 18:14:05 +0000
Subject: [PATCH 31/45] Removing brainnet to avoid feature creep

---
 cca_zoo/data/toy.py              | 56 ++------------------------------
 cca_zoo/deepmodels/_dcca_base.py |  2 +-
 cca_zoo/deepmodels/dccae.py      | 12 +++++--
 cca_zoo/deepmodels/dvcca.py      | 16 ++++-----
 cca_zoo/deepmodels/splitae.py    | 14 ++++++--
 cca_zoo/deepmodels/trainers.py   | 32 +++++++++---------
 cca_zoo/deepmodels/utils.py      | 12 +++----
 cca_zoo/test/test_deepmodels.py  |  2 +-
 examples/__init__.py             |  0
 examples/plot_dcca.py            | 16 ++++++++-
 examples/plot_dvcca.py           | 52 +++++++++++++++++------------
 examples/utils.py                | 38 ++++++++++++++++++++++
 12 files changed, 140 insertions(+), 112 deletions(-)
 create mode 100644 examples/__init__.py
 create mode 100644 examples/utils.py

diff --git a/cca_zoo/data/toy.py b/cca_zoo/data/toy.py
index cd4c70bf..40911696 100644
--- a/cca_zoo/data/toy.py
+++ b/cca_zoo/data/toy.py
@@ -125,7 +125,7 @@ def __getitem__(self, idx):
         x_a = transforms.functional.rotate(
             x_a, rot_a.item(), interpolation=InterpolationMode.BILINEAR
         )
-        x_a = self.base_transform(x_a)  # convert from PIL back to pytorch tensor
+        x_a = self.base_transform(x_a)
 
         label = self.targets[idx]
         # get random index of image with same class
@@ -140,26 +140,6 @@ def __getitem__(self, idx):
             x_b = torch.flatten(x_b)
         return (x_b, x_a), (rot_a, label)
 
-    def to_numpy(self, indices=None):
-        """
-        Converts dataset to numpy array form
-
-        :param indices: indices of the samples to extract into numpy arrays
-        """
-        if indices is None:
-            indices = np.arange(self.__len__())
-        view_1 = np.zeros((len(indices), 784))
-        view_2 = np.zeros((len(indices), 784))
-        labels = np.zeros(len(indices)).astype(int)
-        rotations = np.zeros(len(indices))
-        for i, n in enumerate(indices):
-            sample = self[n]
-            view_1[i] = sample[0][0].numpy().reshape((-1, 28 * 28))
-            view_2[i] = sample[0][1].numpy().reshape((-1, 28 * 28))
-            rotations[i] = sample[1][0].numpy()
-            labels[i] = sample[1][1].numpy().astype(int)
-        return (view_1, view_2), (rotations, labels)
-
 
 class Tangled_MNIST_Dataset(Dataset):
     """
@@ -185,7 +165,6 @@ def __init__(self, mnist_type="MNIST", train=True, flatten=True):
         self.data = self.dataset.data
         self.transform = transforms.Compose([transforms.ToTensor()])
         self.targets = self.dataset.targets
-        self.OHs = _OH_digits(self.targets.numpy().astype(int))
         self.filtered_classes = []
         self.filtered_nums = []
         for i in range(10):
@@ -222,38 +201,7 @@ def __getitem__(self, idx):
             x_b_rotate = torch.flatten(x_b_rotate)
         return (x_a_rotate, x_b_rotate), (rot_a, rot_b, label)
 
-    def to_numpy(self, indices):
-        """
-        Converts dataset to numpy array form
-
-        :param indices: indices of the samples to extract into numpy arrays
-        """
-        view_1 = np.zeros((len(indices), 784))
-        view_2 = np.zeros((len(indices), 784))
-        labels = np.zeros(len(indices)).astype(int)
-        rotation_1 = np.zeros(len(indices))
-        rotation_2 = np.zeros(len(indices))
-        for i, n in enumerate(indices):
-            sample = self[n]
-            view_1[i] = sample[0][0].numpy().reshape((-1, 28 * 28))
-            view_2[i] = sample[0][1].numpy().reshape((-1, 28 * 28))
-            rotation_1[i] = sample[1][0].numpy()
-            rotation_2[i] = sample[1][1].numpy()
-            labels[i] = sample[1][2].numpy().astype(int)
-        return (view_1, view_2), (rotation_1, rotation_2, labels)
-
-
-def _OH_digits(digits):
-    """
-    One hot encode numpy array
-
-    :param digits:
-    """
-    b = np.zeros((digits.size, digits.max() + 1))
-    b[np.arange(digits.size), digits] = 1
-    return b
-
 
 def _add_mnist_noise(x):
-    x = x + torch.rand(28, 28)
+    x = x + torch.rand(size=(28, 28))
     return x
diff --git a/cca_zoo/deepmodels/_dcca_base.py b/cca_zoo/deepmodels/_dcca_base.py
index 7ad94a86..b2c3fd66 100644
--- a/cca_zoo/deepmodels/_dcca_base.py
+++ b/cca_zoo/deepmodels/_dcca_base.py
@@ -33,4 +33,4 @@ def post_transform(self, *z_list, train=False) -> Iterable[np.ndarray]:
         :param z_list: a list of all of the latent space embeddings for each view
         :param train: if the train flag is True this fits a new post transformation
         """
-        return z_list
+        return z_list
\ No newline at end of file
diff --git a/cca_zoo/deepmodels/dccae.py b/cca_zoo/deepmodels/dccae.py
index 0a167e87..3973ceaa 100644
--- a/cca_zoo/deepmodels/dccae.py
+++ b/cca_zoo/deepmodels/dccae.py
@@ -53,7 +53,15 @@ def forward(self, *args):
             z.append(encoder(args[i]))
         return z
 
-    def decode(self, *z):
+    def recon(self, *args):
+        """
+        :param args:
+        :return:
+        """
+        z = self(*args)
+        return self._decode(*z)
+
+    def _decode(self, *z):
         """
         This method is used to decode from the latent space to the best prediction of the original views
 
@@ -65,7 +73,7 @@ def decode(self, *z):
 
     def loss(self, *args):
         z = self(*args)
-        recon = self.decode(*z)
+        recon = self._decode(*z)
         recon_loss = self._recon_loss(args[: len(recon)], recon)
         return self.lam * recon_loss + self.objective.loss(*z)
 
diff --git a/cca_zoo/deepmodels/dvcca.py b/cca_zoo/deepmodels/dvcca.py
index b6fac805..3967e31d 100644
--- a/cca_zoo/deepmodels/dvcca.py
+++ b/cca_zoo/deepmodels/dvcca.py
@@ -23,11 +23,11 @@ class DVCCA(_DCCA_base):
     """
 
     def __init__(
-        self,
-        latent_dims: int,
-        encoders=None,
-        decoders=None,
-        private_encoders: Iterable[BaseEncoder] = None,
+            self,
+            latent_dims: int,
+            encoders=None,
+            decoders=None,
+            private_encoders: Iterable[BaseEncoder] = None,
     ):
         """
         :param latent_dims: # latent dimensions
@@ -69,8 +69,8 @@ def forward(self, *args, mle=True):
                 z_p = mu_p
             else:
                 z_dist = dist.Normal(mu_p, torch.exp(0.5 * logvar_p))
-                z = z_dist.rsample()
-            z = [torch.cat([z_] + z_p, dim=-1) for z_ in z]
+                z_p = z_dist.rsample()
+            z = [torch.cat((z_, z_p_), dim=-1) for z_, z_p_ in zip(z, z_p)]
         return z
 
     def _encode(self, *args):
@@ -116,7 +116,7 @@ def recon(self, *args):
         :return:
         """
         z = self(*args)
-        return [self._decode(z_i) for z_i in z][0]
+        return [self._decode(z_) for z_ in z]
 
     def loss(self, *args):
         """
diff --git a/cca_zoo/deepmodels/splitae.py b/cca_zoo/deepmodels/splitae.py
index a71a2a73..707ab1db 100644
--- a/cca_zoo/deepmodels/splitae.py
+++ b/cca_zoo/deepmodels/splitae.py
@@ -30,9 +30,17 @@ def __init__(self, latent_dims: int, encoder: BaseEncoder = Encoder, decoders=No
 
     def forward(self, *args):
         z = self.encoder(args[0])
-        return [z]
+        return z
 
-    def decode(self, z):
+    def recon(self, *args):
+        """
+        :param args:
+        :return:
+        """
+        z = self(*args)
+        return self._decode(z)
+
+    def _decode(self, z):
         """
         This method is used to decode from the latent space to the best prediction of the original views
 
@@ -45,7 +53,7 @@ def decode(self, z):
 
     def loss(self, *args):
         z = self(*args)
-        recon = self.decode(*z)
+        recon = self._decode(z)
         recon_loss = self.recon_loss(args, recon)
         return recon_loss
 
diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 25dbc906..754ffbc0 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -72,9 +72,9 @@ def on_validation_epoch_end(self, unused: Optional = None) -> None:
         self.log("val corr", score)
 
     def correlations(
-        self,
-        loader: torch.utils.data.DataLoader,
-        train: bool = False,
+            self,
+            loader: torch.utils.data.DataLoader,
+            train: bool = False,
     ):
         """
 
@@ -87,16 +87,16 @@ def correlations(
             return None
         all_corrs = []
         for x, y in itertools.product(transformed_views, repeat=2):
-            all_corrs.append(np.diag(np.corrcoef(x.T, y.T)[: x.shape[1], y.shape[1] :]))
+            all_corrs.append(np.diag(np.corrcoef(x.T, y.T)[: x.shape[1], y.shape[1]:]))
         all_corrs = np.array(all_corrs).reshape(
             (len(transformed_views), len(transformed_views), -1)
         )
         return all_corrs
 
     def transform(
-        self,
-        loader: torch.utils.data.DataLoader,
-        train: bool = False,
+            self,
+            loader: torch.utils.data.DataLoader,
+            train: bool = False,
     ):
         """
 
@@ -115,13 +115,13 @@ def transform(
                         np.append(z_list[i], z_i.detach().cpu().numpy(), axis=0)
                         for i, z_i in enumerate(z)
                     ]
-        z_list = self.model.post_transform(*z_list, train=train)
+        z_list = self.model.post_transform(z_list, train=train)
         return z_list
 
     def score(
-        self,
-        loader: torch.utils.data.DataLoader,
-        train: bool = False,
+            self,
+            loader: torch.utils.data.DataLoader,
+            train: bool = False,
     ):
         """
 
@@ -136,13 +136,13 @@ def score(
         n_views = pair_corrs.shape[0]
         # sum all the pairwise correlations for each dimension. Subtract the self correlations. Divide by the number of views. Gives average correlation
         dim_corrs = (
-            pair_corrs.sum(axis=tuple(range(pair_corrs.ndim - 1))) - n_views
-        ) / (n_views ** 2 - n_views)
+                            pair_corrs.sum(axis=tuple(range(pair_corrs.ndim - 1))) - n_views
+                    ) / (n_views ** 2 - n_views)
         return dim_corrs
 
-    def predict_view(
-        self,
-        loader: torch.utils.data.DataLoader,
+    def recon(
+            self,
+            loader: torch.utils.data.DataLoader,
     ):
         with torch.no_grad():
             for batch_idx, (data, label) in enumerate(loader):
diff --git a/cca_zoo/deepmodels/utils.py b/cca_zoo/deepmodels/utils.py
index 8e23e120..8c04edc3 100644
--- a/cca_zoo/deepmodels/utils.py
+++ b/cca_zoo/deepmodels/utils.py
@@ -8,11 +8,11 @@
 
 
 def process_data(
-    dataset: Union[torch.utils.data.Dataset, Iterable[np.ndarray]],
-    val_dataset: Union[torch.utils.data.Dataset, Iterable[np.ndarray]] = None,
-    labels=None,
-    val_labels=None,
-    val_split: float = 0,
+        dataset: Union[torch.utils.data.Dataset, Iterable[np.ndarray]],
+        val_dataset: Union[torch.utils.data.Dataset, Iterable[np.ndarray]] = None,
+        labels=None,
+        val_labels=None,
+        val_split: float = 0,
 ):
     # Ensure datasets are in the right form (e.g. if numpy arrays are passed turn them into
     if isinstance(dataset, tuple):
@@ -29,7 +29,7 @@ def process_data(
 
 
 def get_dataloaders(
-    dataset, val_dataset=None, batch_size=None, val_batch_size=None, num_workers=0
+        dataset, val_dataset=None, batch_size=None, val_batch_size=None, num_workers=0
 ):
     if batch_size is None:
         batch_size = len(dataset)
diff --git a/cca_zoo/test/test_deepmodels.py b/cca_zoo/test/test_deepmodels.py
index b301463e..a1fc4dec 100644
--- a/cca_zoo/test/test_deepmodels.py
+++ b/cca_zoo/test/test_deepmodels.py
@@ -161,7 +161,7 @@ def test_DTCCA_methods():
     dtcca = DTCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
     dtcca = CCALightning(dtcca)
     trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
-    trainer.fit(dtcca, train_loader)
+    trainer.fit(dtcca, conv_loader)
 
 
 def test_DCCAE_methods():
diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/plot_dcca.py b/examples/plot_dcca.py
index 16da6ac0..a77f415c 100644
--- a/examples/plot_dcca.py
+++ b/examples/plot_dcca.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pytorch_lightning as pl
+from matplotlib import pyplot as plt
 from torch.utils.data import Subset
 
 # %%
@@ -20,6 +21,7 @@
     DCCA_SDL,
     BarlowTwins,
 )
+from examples.utils import plot_latent_label
 
 n_train = 500
 n_val = 100
@@ -31,7 +33,7 @@
 # The number of latent dimensions across models
 latent_dims = 2
 # number of epochs for deep models
-epochs = 10
+epochs = 20
 
 encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
 encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
@@ -42,6 +44,9 @@
 dcca = CCALightning(dcca)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
+plot_latent_label(dcca.model, train_loader)
+plt.suptitle('DCCA')
+plt.show()
 
 # %%
 # Deep CCA by Non-Linear Orthogonal Iterations
@@ -51,6 +56,9 @@
 dcca_noi = CCALightning(dcca_noi)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca_noi, train_loader, val_loader)
+plot_latent_label(dcca_noi.model, train_loader)
+plt.title('DCCA by Non-Linear Orthogonal Iterations')
+plt.show()
 
 # %%
 # Deep CCA by Stochastic Decorrelation Loss
@@ -60,6 +68,9 @@
 dcca_sdl = CCALightning(dcca_sdl)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca_sdl, train_loader, val_loader)
+plot_latent_label(dcca_sdl.model, train_loader)
+plt.title('DCCA by Stochastic Decorrelation')
+plt.show()
 
 # %%
 # Deep CCA by Barlow Twins
@@ -67,3 +78,6 @@
 barlowtwins = CCALightning(barlowtwins)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
+plot_latent_label(dcca_sdl.model, train_loader)
+plt.title('DCCA by Barlow Twins')
+plt.show()
diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index f039c2e2..66acc42b 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -5,12 +5,13 @@
 This example demonstrates multiview models which can reconstruct their inputs
 """
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pytorch_lightning as pl
 from torch.utils.data import Subset
 
 # %%
-from cca_zoo.data import Split_MNIST_Dataset
+from cca_zoo.data import Noisy_MNIST_Dataset
 from cca_zoo.deepmodels import (
     CCALightning,
     get_dataloaders,
@@ -18,10 +19,11 @@
     DCCAE,
     DVCCA,
 )
+from examples.utils import plot_reconstruction
 
 n_train = 500
 n_val = 100
-train_dataset = Split_MNIST_Dataset(mnist_type="MNIST", train=True)
+train_dataset = Noisy_MNIST_Dataset(mnist_type="MNIST", train=True, flatten=False)
 val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val))
 train_dataset = Subset(train_dataset, np.arange(n_train))
 train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)
@@ -30,15 +32,17 @@
 latent_dims = 2
 # number of epochs for deep models
 epochs = 10
+# channels in encoders and decoders
+channels = [16, 16]
 
-encoder_1 = architectures.Encoder(
-    latent_dims=latent_dims, feature_size=392, variational=True
+encoder_1 = architectures.CNNEncoder(
+    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
 )
-encoder_2 = architectures.Encoder(
-    latent_dims=latent_dims, feature_size=392, variational=True
+encoder_2 = architectures.CNNEncoder(
+    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
 )
-decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=392)
-decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=392)
+decoder_1 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(28, 28))
+decoder_2 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(28, 28))
 
 # %%
 # Deep VCCA
@@ -50,19 +54,21 @@
 dcca = CCALightning(dcca)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
+plot_reconstruction(dcca.model, train_dataset, 0)
+plt.suptitle('DVCCA')
+plt.show()
 
 # %%
 # Deep VCCA (private)
 # We need to add additional private encoders and change (double) the dimensionality of the decoders.
-private_encoder_1 = architectures.Encoder(
-    latent_dims=latent_dims, feature_size=392, variational=True
+private_encoder_1 = architectures.CNNEncoder(
+    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
 )
-private_encoder_2 = architectures.Encoder(
-    latent_dims=latent_dims, feature_size=392, variational=True
+private_encoder_2 = architectures.CNNEncoder(
+    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
 )
-private_decoder_1 = architectures.Decoder(latent_dims=2 * latent_dims, feature_size=392)
-private_decoder_2 = architectures.Decoder(latent_dims=2 * latent_dims, feature_size=392)
-
+private_decoder_1 = architectures.CNNDecoder(latent_dims=2 * latent_dims, feature_size=(28, 28))
+private_decoder_2 = architectures.CNNDecoder(latent_dims=2 * latent_dims, feature_size=(28, 28))
 dcca = DVCCA(
     latent_dims=latent_dims,
     encoders=[encoder_1, encoder_2],
@@ -72,16 +78,22 @@
 dcca = CCALightning(dcca)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
+plot_reconstruction(dcca.model, train_dataset, 0)
+plt.suptitle('DVCCA Private')
+plt.show()
 
 # %%
 # DCCAE
-encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
-encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=392)
-dccae_model = DCCAE(
+encoder_1 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(28, 28), channels=channels, )
+encoder_2 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(28, 28), channels=channels, )
+dcca = DCCAE(
     latent_dims=latent_dims,
     encoders=[encoder_1, encoder_2],
     decoders=[decoder_1, decoder_2],
 )
-dccae_model = CCALightning(dccae_model)
+dcca = CCALightning(dcca)
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
-trainer.fit(dccae_model, train_loader, val_loader)
+trainer.fit(dcca, train_loader, val_loader)
+plot_reconstruction(dcca.model, train_dataset, 0)
+plt.suptitle('DCCAE')
+plt.show()
diff --git a/examples/utils.py b/examples/utils.py
new file mode 100644
index 00000000..26578f94
--- /dev/null
+++ b/examples/utils.py
@@ -0,0 +1,38 @@
+import matplotlib.pyplot as plt
+
+
+def plot_reconstruction(model, dataset, idx):
+    (x, y), _ = dataset[idx]
+    recon_x, recon_y = model.recon(x[None, :, :, :], y[None, :, :, :])
+    if isinstance(recon_x, list):
+        recon_x = recon_x[0]
+        recon_y = recon_y[0]
+    recon_x = recon_x.detach().numpy()
+    recon_y = recon_y.detach().numpy()
+    fig, ax = plt.subplots(ncols=4)
+    ax[0].set_title('Original View 1')
+    ax[1].set_title('Original View 2')
+    ax[2].set_title('Reconstruction View 1')
+    ax[3].set_title('Reconstruction View 2')
+    ax[0].imshow(x[0].detach().numpy())
+    ax[1].imshow(y[0].detach().numpy())
+    ax[2].imshow(recon_x[0, 0])
+    ax[3].imshow(recon_y[0, 0])
+
+
+def plot_latent_label(model, dataloader, num_batches=100):
+    fig, ax = plt.subplots(ncols=model.latent_dims)
+    for j in range(model.latent_dims):
+        ax[j].set_title(f'Dimension {j}')
+        ax[j].set_xlabel('View 1')
+        ax[j].set_ylabel('View 2')
+    for i, (data, label) in enumerate(dataloader):
+        z = model(*data)
+        zx, zy = z
+        zx = zx.to('cpu').detach().numpy()
+        zy = zy.to('cpu').detach().numpy()
+        for j in range(model.latent_dims):
+            ax[j].scatter(zx[:, j], zy[:, j], c=label.numpy(), cmap='tab10')
+        if i > num_batches:
+            plt.colorbar()
+            break

From a7d5f6e5f6cc29059f96035a8dfad4eee058bf32 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 18:49:33 +0000
Subject: [PATCH 32/45] Removing brainnet to avoid feature creep

---
 cca_zoo/data/toy.py    | 112 ++++++++++++++++++-----------------------
 examples/plot_dvcca.py |  34 ++++++-------
 examples/utils.py      |  10 ++--
 3 files changed, 70 insertions(+), 86 deletions(-)

diff --git a/cca_zoo/data/toy.py b/cca_zoo/data/toy.py
index 40911696..e91e79da 100644
--- a/cca_zoo/data/toy.py
+++ b/cca_zoo/data/toy.py
@@ -4,10 +4,9 @@
 import numpy as np
 import torch
 import torch.utils.data
-from PIL import Image
+import torchvision
 from torch.utils.data import Dataset
 from torchvision import datasets, transforms
-from torchvision.transforms.functional import InterpolationMode
 
 
 class Split_MNIST_Dataset(Dataset):
@@ -16,7 +15,7 @@ class Split_MNIST_Dataset(Dataset):
     """
 
     def __init__(
-        self, mnist_type: str = "MNIST", train: bool = True, flatten: bool = True
+            self, mnist_type: str = "MNIST", train: bool = True, flatten: bool = True
     ):
         """
 
@@ -72,7 +71,7 @@ class Noisy_MNIST_Dataset(Dataset):
     """
 
     def __init__(
-        self, mnist_type: str = "MNIST", train: bool = True, flatten: bool = True
+            self, mnist_type: str = "MNIST", train: bool = True, flatten: bool = True
     ):
         """
 
@@ -81,25 +80,32 @@ def __init__(
         :param flatten: whether to flatten the data into array or use 2d images
         """
         if mnist_type == "MNIST":
-            self.dataset = datasets.MNIST("../../data", train=train, download=True)
+            self.dataset = datasets.MNIST("../../data", train=train, download=True,
+                                          transform=torchvision.transforms.Compose([
+                                              torchvision.transforms.ToTensor()]))
         elif mnist_type == "FashionMNIST":
             self.dataset = datasets.FashionMNIST(
-                "../../data", train=train, download=True
-            )
+                "../../data", train=train, download=True, transform=torchvision.transforms.Compose([
+                    torchvision.transforms.ToTensor()]))
         elif mnist_type == "KMNIST":
-            self.dataset = datasets.KMNIST("../../data", train=train, download=True)
+            self.dataset = datasets.KMNIST("../../data", train=train, download=True,
+                                           transform=torchvision.transforms.Compose([
+                                               torchvision.transforms.ToTensor(),
+                                               transforms.Normalize((0.1307,), (0.3081,))]))
 
-        self.data = self.dataset.data
         self.base_transform = transforms.ToTensor()
         self.a_transform = transforms.Compose(
             [
-                transforms.ToTensor(),  # first, convert image to PyTorch tensor
-                transforms.ToPILImage(),
+                torchvision.transforms.RandomRotation((-45, 45))
+            ]
+        )
+        self.a_transform = transforms.Compose(
+            [
+                torchvision.transforms.RandomRotation((-45, 45))
             ]
         )
         self.b_transform = transforms.Compose(
             [
-                transforms.ToTensor(),
                 transforms.Lambda(_add_mnist_noise),
                 transforms.Lambda(self.__threshold_func__),
             ]
@@ -108,8 +114,7 @@ def __init__(
         self.filtered_classes = []
         self.filtered_nums = []
         for i in range(10):
-            self.filtered_classes.append(self.data[self.targets == i])
-            self.filtered_nums.append(self.filtered_classes[i].shape[0])
+            self.filtered_nums.append(np.where(self.targets == i)[0])
         self.flatten = flatten
 
     def __threshold_func__(self, x):
@@ -117,28 +122,18 @@ def __threshold_func__(self, x):
         return x
 
     def __len__(self):
-        return len(self.data)
+        return len(self.dataset)
 
     def __getitem__(self, idx):
-        x_a = self.a_transform(self.data[idx].numpy() / 255)
-        rot_a = torch.rand(1) * 90 - 45
-        x_a = transforms.functional.rotate(
-            x_a, rot_a.item(), interpolation=InterpolationMode.BILINEAR
-        )
-        x_a = self.base_transform(x_a)
-
-        label = self.targets[idx]
+        x_a, label = self.dataset[idx]
+        x_a = self.a_transform(x_a)
         # get random index of image with same class
-        random_index = np.random.randint(self.filtered_nums[label])
-        x_b = Image.fromarray(
-            self.filtered_classes[label][random_index, :, :].numpy() / 255, mode="L"
-        )
-        x_b = self.b_transform(x_b)
-
+        random_index = np.random.choice(self.filtered_nums[label])
+        x_b = self.b_transform(self.dataset[random_index][0])
         if self.flatten:
             x_a = torch.flatten(x_a)
             x_b = torch.flatten(x_b)
-        return (x_b, x_a), (rot_a, label)
+        return (x_b, x_a), label
 
 
 class Tangled_MNIST_Dataset(Dataset):
@@ -154,54 +149,45 @@ def __init__(self, mnist_type="MNIST", train=True, flatten=True):
         :param flatten: whether to flatten the data into array or use 2d images
         """
         if mnist_type == "MNIST":
-            self.dataset = datasets.MNIST("../../data", train=train, download=True)
+            self.dataset = datasets.MNIST("../../data", train=train, download=True,
+                                          transform=torchvision.transforms.Compose([
+                                              torchvision.transforms.ToTensor()]))
         elif mnist_type == "FashionMNIST":
             self.dataset = datasets.FashionMNIST(
-                "../../data", train=train, download=True
-            )
+                "../../data", train=train, download=True, transform=torchvision.transforms.Compose([
+                    torchvision.transforms.ToTensor()]))
         elif mnist_type == "KMNIST":
-            self.dataset = datasets.KMNIST("../../data", train=train, download=True)
-
-        self.data = self.dataset.data
-        self.transform = transforms.Compose([transforms.ToTensor()])
+            self.dataset = datasets.KMNIST("../../data", train=train, download=True,
+                                           transform=torchvision.transforms.Compose([
+                                               torchvision.transforms.ToTensor(),
+                                               transforms.Normalize((0.1307,), (0.3081,))]))
+        self.transform = transforms.Compose(
+            [
+                torchvision.transforms.RandomRotation((-45, 45))
+            ]
+        )
         self.targets = self.dataset.targets
         self.filtered_classes = []
         self.filtered_nums = []
         for i in range(10):
-            self.filtered_classes.append(self.data[self.targets == i])
-            self.filtered_nums.append(self.filtered_classes[i].shape[0])
+            self.filtered_nums.append(np.where(self.targets == i)[0])
         self.flatten = flatten
 
     def __len__(self):
-        return len(self.data)
+        return len(self.dataset)
 
     def __getitem__(self, idx):
-        # get first image from idx and second of same class
-        label = self.targets[idx]
-        x_a = Image.fromarray(self.data[idx].numpy() / 255, mode="L")
+        x_a, label = self.dataset[idx]
+        x_a = self.transform(x_a)
         # get random index of image with same class
-        random_index = np.random.randint(self.filtered_nums[label])
-        x_b = Image.fromarray(
-            self.filtered_classes[label][random_index, :, :].numpy() / 255, mode="L"
-        )
-        # get random angles of rotation
-        rot_a, rot_b = torch.rand(2) * 90 - 45
-        x_a_rotate = transforms.functional.rotate(
-            x_a, rot_a.item(), interpolation=InterpolationMode.BILINEAR
-        )
-        x_b_rotate = transforms.functional.rotate(
-            x_b, rot_b.item(), interpolation=InterpolationMode.BILINEAR
-        )
-        # convert images to tensors
-        x_a_rotate = self.transform(x_a_rotate)
-        x_b_rotate = self.transform(x_b_rotate)
-
+        random_index = np.random.choice(self.filtered_nums[label])
+        x_b = self.transform(self.dataset[random_index][0])
         if self.flatten:
-            x_a_rotate = torch.flatten(x_a_rotate)
-            x_b_rotate = torch.flatten(x_b_rotate)
-        return (x_a_rotate, x_b_rotate), (rot_a, rot_b, label)
+            x_a = torch.flatten(x_a)
+            x_b = torch.flatten(x_b)
+        return (x_b, x_a), label
 
 
 def _add_mnist_noise(x):
-    x = x + torch.rand(size=(28, 28))
+    x = x + torch.rand(28, 28) / 10
     return x
diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index 66acc42b..89c7a885 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -23,7 +23,7 @@
 
 n_train = 500
 n_val = 100
-train_dataset = Noisy_MNIST_Dataset(mnist_type="MNIST", train=True, flatten=False)
+train_dataset = Noisy_MNIST_Dataset(mnist_type="MNIST", train=True, flatten=True)
 val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val))
 train_dataset = Subset(train_dataset, np.arange(n_train))
 train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)
@@ -31,18 +31,16 @@
 # The number of latent dimensions across models
 latent_dims = 2
 # number of epochs for deep models
-epochs = 10
-# channels in encoders and decoders
-channels = [16, 16]
+epochs = 20
 
-encoder_1 = architectures.CNNEncoder(
-    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
+encoder_1 = architectures.Encoder(
+    latent_dims=latent_dims, feature_size=784, variational=True
 )
-encoder_2 = architectures.CNNEncoder(
-    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
+encoder_2 = architectures.Encoder(
+    latent_dims=latent_dims, feature_size=784, variational=True
 )
-decoder_1 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(28, 28))
-decoder_2 = architectures.CNNDecoder(latent_dims=latent_dims, feature_size=(28, 28))
+decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
+decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
 
 # %%
 # Deep VCCA
@@ -61,14 +59,14 @@
 # %%
 # Deep VCCA (private)
 # We need to add additional private encoders and change (double) the dimensionality of the decoders.
-private_encoder_1 = architectures.CNNEncoder(
-    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
+private_encoder_1 = architectures.Encoder(
+    latent_dims=latent_dims, feature_size=784, variational=True
 )
-private_encoder_2 = architectures.CNNEncoder(
-    latent_dims=latent_dims, feature_size=(28, 28), variational=True, channels=channels,
+private_encoder_2 = architectures.Encoder(
+    latent_dims=latent_dims, feature_size=784, variational=True
 )
-private_decoder_1 = architectures.CNNDecoder(latent_dims=2 * latent_dims, feature_size=(28, 28))
-private_decoder_2 = architectures.CNNDecoder(latent_dims=2 * latent_dims, feature_size=(28, 28))
+private_decoder_1 = architectures.Decoder(latent_dims=2 * latent_dims, feature_size=784)
+private_decoder_2 = architectures.Decoder(latent_dims=2 * latent_dims, feature_size=784)
 dcca = DVCCA(
     latent_dims=latent_dims,
     encoders=[encoder_1, encoder_2],
@@ -84,8 +82,8 @@
 
 # %%
 # DCCAE
-encoder_1 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(28, 28), channels=channels, )
-encoder_2 = architectures.CNNEncoder(latent_dims=latent_dims, feature_size=(28, 28), channels=channels, )
+encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
+encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
 dcca = DCCAE(
     latent_dims=latent_dims,
     encoders=[encoder_1, encoder_2],
diff --git a/examples/utils.py b/examples/utils.py
index 26578f94..1fe79863 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -3,7 +3,7 @@
 
 def plot_reconstruction(model, dataset, idx):
     (x, y), _ = dataset[idx]
-    recon_x, recon_y = model.recon(x[None, :, :, :], y[None, :, :, :])
+    recon_x, recon_y = model.recon(x, y)
     if isinstance(recon_x, list):
         recon_x = recon_x[0]
         recon_y = recon_y[0]
@@ -14,10 +14,10 @@ def plot_reconstruction(model, dataset, idx):
     ax[1].set_title('Original View 2')
     ax[2].set_title('Reconstruction View 1')
     ax[3].set_title('Reconstruction View 2')
-    ax[0].imshow(x[0].detach().numpy())
-    ax[1].imshow(y[0].detach().numpy())
-    ax[2].imshow(recon_x[0, 0])
-    ax[3].imshow(recon_y[0, 0])
+    ax[0].imshow(x.detach().numpy().reshape((28, 28)))
+    ax[1].imshow(y.detach().numpy().reshape((28, 28)))
+    ax[2].imshow(recon_x.reshape((28, 28)))
+    ax[3].imshow(recon_y.reshape((28, 28)))
 
 
 def plot_latent_label(model, dataloader, num_batches=100):

From 0b6a639c9b20d91e64fdb4a6deeacec7c52d0575 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 18:51:19 +0000
Subject: [PATCH 33/45] Removing brainnet to avoid feature creep

---
 cca_zoo/deepmodels/dccae.py | 2 +-
 cca_zoo/deepmodels/dvcca.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cca_zoo/deepmodels/dccae.py b/cca_zoo/deepmodels/dccae.py
index 3973ceaa..499a17e0 100644
--- a/cca_zoo/deepmodels/dccae.py
+++ b/cca_zoo/deepmodels/dccae.py
@@ -80,6 +80,6 @@ def loss(self, *args):
     @staticmethod
     def _recon_loss(x, recon):
         recons = [
-            F.mse_loss(recon_, x_, reduction="mean") for recon_, x_ in zip(recon, x)
+            F.binary_cross_entropy(recon_, x_, reduction="mean") for recon_, x_ in zip(recon, x)
         ]
         return torch.stack(recons).sum(dim=0)
diff --git a/cca_zoo/deepmodels/dvcca.py b/cca_zoo/deepmodels/dvcca.py
index 3967e31d..e50728de 100644
--- a/cca_zoo/deepmodels/dvcca.py
+++ b/cca_zoo/deepmodels/dvcca.py
@@ -155,7 +155,7 @@ def vcca_loss(self, *args, mu, logvar):
         recons = self._decode(z)
         bces = torch.stack(
             [
-                F.binary_cross_entropy(recon, arg, reduction="sum") / batch_n
+                F.binary_cross_entropy(recon, arg, reduction="mean")
                 for recon, arg in zip(recons, args)
             ]
         ).sum()

From 83b27c864f6a7c42bf633f76710c063c5636ecc8 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 18:59:41 +0000
Subject: [PATCH 34/45] Removing brainnet to avoid feature creep

---
 cca_zoo/deepmodels/__init__.py | 2 +-
 examples/plot_dvcca.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cca_zoo/deepmodels/__init__.py b/cca_zoo/deepmodels/__init__.py
index 97d43dec..bb50f89f 100644
--- a/cca_zoo/deepmodels/__init__.py
+++ b/cca_zoo/deepmodels/__init__.py
@@ -10,4 +10,4 @@
 from .dvcca import DVCCA
 from .splitae import SplitAE
 from .trainers import CCALightning
-from .utils import get_dataloaders, process_data
+from .utils import get_dataloaders, process_data
\ No newline at end of file
diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index 89c7a885..83474147 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -39,8 +39,8 @@
 encoder_2 = architectures.Encoder(
     latent_dims=latent_dims, feature_size=784, variational=True
 )
-decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
-decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
+decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
+decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
 
 # %%
 # Deep VCCA

From 38bbe303d0453c233879659e8413244ba1f8b0f5 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 19:14:10 +0000
Subject: [PATCH 35/45] Removing brainnet to avoid feature creep

---
 cca_zoo/deepmodels/_dcca_base.py |  4 ++--
 cca_zoo/deepmodels/dcca.py       |  2 +-
 cca_zoo/deepmodels/dtcca.py      |  5 ++---
 cca_zoo/deepmodels/splitae.py    |  8 +++----
 examples/plot_dcca.py            | 20 ++++++++++++++++-
 examples/plot_dvcca.py           | 21 +++++++++++++++++-
 examples/utils.py                | 38 --------------------------------
 7 files changed, 48 insertions(+), 50 deletions(-)
 delete mode 100644 examples/utils.py

diff --git a/cca_zoo/deepmodels/_dcca_base.py b/cca_zoo/deepmodels/_dcca_base.py
index b2c3fd66..f3463756 100644
--- a/cca_zoo/deepmodels/_dcca_base.py
+++ b/cca_zoo/deepmodels/_dcca_base.py
@@ -26,11 +26,11 @@ def loss(self, *args, **kwargs):
         """
         raise NotImplementedError
 
-    def post_transform(self, *z_list, train=False) -> Iterable[np.ndarray]:
+    def post_transform(self, z_list, train=False) -> Iterable[np.ndarray]:
         """
         Some models require a final linear CCA after model training.
 
         :param z_list: a list of all of the latent space embeddings for each view
         :param train: if the train flag is True this fits a new post transformation
         """
-        return z_list
\ No newline at end of file
+        return z_list
diff --git a/cca_zoo/deepmodels/dcca.py b/cca_zoo/deepmodels/dcca.py
index 05afb8ce..c61cc86d 100644
--- a/cca_zoo/deepmodels/dcca.py
+++ b/cca_zoo/deepmodels/dcca.py
@@ -55,7 +55,7 @@ def loss(self, *args):
         z = self(*args)
         return self.objective.loss(*z)
 
-    def post_transform(self, *z_list, train=False):
+    def post_transform(self, z_list, train=False):
         if train:
             self.cca = MCCA(latent_dims=self.latent_dims)
             z_list = self.cca.fit_transform(z_list)
diff --git a/cca_zoo/deepmodels/dtcca.py b/cca_zoo/deepmodels/dtcca.py
index 85209061..419d5760 100644
--- a/cca_zoo/deepmodels/dtcca.py
+++ b/cca_zoo/deepmodels/dtcca.py
@@ -37,11 +37,10 @@ def __init__(
             eps=eps,
         )
 
-    def post_transform(self, *z_list, train=False) -> Iterable[np.ndarray]:
+    def post_transform(self, z_list, train=False) -> Iterable[np.ndarray]:
         if train:
             self.cca = TCCA(latent_dims=self.latent_dims)
-            self.cca.fit(z_list)
-            z_list = self.cca.transform(z_list)
+            z_list = self.cca.fit_transform(z_list)
         else:
             z_list = self.cca.transform(z_list)
         return z_list
diff --git a/cca_zoo/deepmodels/splitae.py b/cca_zoo/deepmodels/splitae.py
index 707ab1db..1d66dd8e 100644
--- a/cca_zoo/deepmodels/splitae.py
+++ b/cca_zoo/deepmodels/splitae.py
@@ -30,7 +30,7 @@ def __init__(self, latent_dims: int, encoder: BaseEncoder = Encoder, decoders=No
 
     def forward(self, *args):
         z = self.encoder(args[0])
-        return z
+        return [z]
 
     def recon(self, *args):
         """
@@ -40,7 +40,7 @@ def recon(self, *args):
         z = self(*args)
         return self._decode(z)
 
-    def _decode(self, z):
+    def _decode(self, *z):
         """
         This method is used to decode from the latent space to the best prediction of the original views
 
@@ -48,12 +48,12 @@ def _decode(self, z):
         """
         recon = []
         for i, decoder in enumerate(self.decoders):
-            recon.append(decoder(z))
+            recon.append(decoder(*z))
         return tuple(recon)
 
     def loss(self, *args):
         z = self(*args)
-        recon = self._decode(z)
+        recon = self._decode(*z)
         recon_loss = self.recon_loss(args, recon)
         return recon_loss
 
diff --git a/examples/plot_dcca.py b/examples/plot_dcca.py
index a77f415c..4cd6ef3f 100644
--- a/examples/plot_dcca.py
+++ b/examples/plot_dcca.py
@@ -21,7 +21,25 @@
     DCCA_SDL,
     BarlowTwins,
 )
-from examples.utils import plot_latent_label
+
+
+def plot_latent_label(model, dataloader, num_batches=100):
+    fig, ax = plt.subplots(ncols=model.latent_dims)
+    for j in range(model.latent_dims):
+        ax[j].set_title(f'Dimension {j}')
+        ax[j].set_xlabel('View 1')
+        ax[j].set_ylabel('View 2')
+    for i, (data, label) in enumerate(dataloader):
+        z = model(*data)
+        zx, zy = z
+        zx = zx.to('cpu').detach().numpy()
+        zy = zy.to('cpu').detach().numpy()
+        for j in range(model.latent_dims):
+            ax[j].scatter(zx[:, j], zy[:, j], c=label.numpy(), cmap='tab10')
+        if i > num_batches:
+            plt.colorbar()
+            break
+
 
 n_train = 500
 n_val = 100
diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index 83474147..c7c04ece 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -19,7 +19,26 @@
     DCCAE,
     DVCCA,
 )
-from examples.utils import plot_reconstruction
+
+
+def plot_reconstruction(model, dataset, idx):
+    (x, y), _ = dataset[idx]
+    recon_x, recon_y = model.recon(x, y)
+    if isinstance(recon_x, list):
+        recon_x = recon_x[0]
+        recon_y = recon_y[0]
+    recon_x = recon_x.detach().numpy()
+    recon_y = recon_y.detach().numpy()
+    fig, ax = plt.subplots(ncols=4)
+    ax[0].set_title('Original View 1')
+    ax[1].set_title('Original View 2')
+    ax[2].set_title('Reconstruction View 1')
+    ax[3].set_title('Reconstruction View 2')
+    ax[0].imshow(x.detach().numpy().reshape((28, 28)))
+    ax[1].imshow(y.detach().numpy().reshape((28, 28)))
+    ax[2].imshow(recon_x.reshape((28, 28)))
+    ax[3].imshow(recon_y.reshape((28, 28)))
+
 
 n_train = 500
 n_val = 100
diff --git a/examples/utils.py b/examples/utils.py
deleted file mode 100644
index 1fe79863..00000000
--- a/examples/utils.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import matplotlib.pyplot as plt
-
-
-def plot_reconstruction(model, dataset, idx):
-    (x, y), _ = dataset[idx]
-    recon_x, recon_y = model.recon(x, y)
-    if isinstance(recon_x, list):
-        recon_x = recon_x[0]
-        recon_y = recon_y[0]
-    recon_x = recon_x.detach().numpy()
-    recon_y = recon_y.detach().numpy()
-    fig, ax = plt.subplots(ncols=4)
-    ax[0].set_title('Original View 1')
-    ax[1].set_title('Original View 2')
-    ax[2].set_title('Reconstruction View 1')
-    ax[3].set_title('Reconstruction View 2')
-    ax[0].imshow(x.detach().numpy().reshape((28, 28)))
-    ax[1].imshow(y.detach().numpy().reshape((28, 28)))
-    ax[2].imshow(recon_x.reshape((28, 28)))
-    ax[3].imshow(recon_y.reshape((28, 28)))
-
-
-def plot_latent_label(model, dataloader, num_batches=100):
-    fig, ax = plt.subplots(ncols=model.latent_dims)
-    for j in range(model.latent_dims):
-        ax[j].set_title(f'Dimension {j}')
-        ax[j].set_xlabel('View 1')
-        ax[j].set_ylabel('View 2')
-    for i, (data, label) in enumerate(dataloader):
-        z = model(*data)
-        zx, zy = z
-        zx = zx.to('cpu').detach().numpy()
-        zy = zy.to('cpu').detach().numpy()
-        for j in range(model.latent_dims):
-            ax[j].scatter(zx[:, j], zy[:, j], c=label.numpy(), cmap='tab10')
-        if i > num_batches:
-            plt.colorbar()
-            break

From 33e96a4b4b2ae7bef0081608ba7b5f4a5a8aaf03 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 22:41:34 +0000
Subject: [PATCH 36/45] Touching up examples

---
 examples/plot_dvcca.py                    |  2 +-
 examples/plot_hyperparameter_selection.py | 35 ++++++++++-------------
 examples/plot_many_views.py               | 32 ++++++++++-----------
 3 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index c7c04ece..62df7811 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -50,7 +50,7 @@ def plot_reconstruction(model, dataset, idx):
 # The number of latent dimensions across models
 latent_dims = 2
 # number of epochs for deep models
-epochs = 20
+epochs = 50
 
 encoder_1 = architectures.Encoder(
     latent_dims=latent_dims, feature_size=784, variational=True
diff --git a/examples/plot_hyperparameter_selection.py b/examples/plot_hyperparameter_selection.py
index 1bfb4714..8ec239be 100644
--- a/examples/plot_hyperparameter_selection.py
+++ b/examples/plot_hyperparameter_selection.py
@@ -26,20 +26,17 @@
     n, view_features=[p, q], latent_dims=latent_dims, correlation=[0.9]
 )
 
-"""
-Grid Search
---------------------
-
-Hyperparameter selection works in a very similar way to in scikit-learn where the main difference is in how we enter the parameter grid.
-We form a parameter grid with the search space for each view for each parameter.
-This search space must be entered as a list but can be any of
-- a single value (as in "kernel") where this value will be used for each view
-- a list for each view
-- a mixture of a single value for one view and a distribution or list for the other
-"""
+# %%
+# Grid Search
+# ^^^^^^^^^^^^^
+# Hyperparameter selection works in a very similar way to in scikit-learn where the main difference is in how we enter the parameter grid.
+# We form a parameter grid with the search space for each view for each parameter.
+# This search space must be entered as a list but can be any of
+# - a single value (as in "kernel") where this value will be used for each view
+# - a list for each view
+# - a mixture of a single value for one view and a distribution or list for the other
 
 # %%
-# Linear Grid Search
 param_grid = {"kernel": ["poly"], "c": [[1e-1], [1e-1, 2e-1]], "degree": [[2], [2, 3]]}
 kernel_reg = (
     GridSearchCV(
@@ -47,16 +44,16 @@
     )
         .fit([X, Y])
 )
+print(pd.DataFrame(kernel_reg.cv_results_))
 
-"""
-Randomized Search
---------------------
+# %%
+# Randomized Search
+# ^^^^^^^^^^^^^^^^^^^
 
-With Randomized Search we can additionally use distributions from scikit-learn to define the parameter search space
-"""
+# With Randomized Search we can additionally use distributions from scikit-learn to define the parameter search space
 
 # %%
-# Linear Randomized Search
+# Randomized Search
 param_grid = {"kernel": ["poly"], "c": [loguniform(1e-1, 2e-1), [1e-1]], "degree": [[2], [2, 3]]}
 kernel_reg = (
     RandomizedSearchCV(
@@ -64,6 +61,4 @@
     )
         .fit([X, Y])
 )
-
-# %%
 print(pd.DataFrame(kernel_reg.cv_results_))
diff --git a/examples/plot_many_views.py b/examples/plot_many_views.py
index 88a28400..dff455df 100644
--- a/examples/plot_many_views.py
+++ b/examples/plot_many_views.py
@@ -23,10 +23,9 @@
     n, view_features=[p, q, r], latent_dims=latent_dims, correlation=[0.9]
 )
 
-"""
-Eigendecomposition-Based Methods
----------------------------------
-"""
+# %%
+# Eigendecomposition-Based Methods
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 # %%
 mcca = MCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
@@ -34,10 +33,9 @@
 # %%
 gcca = GCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
-"""
-We can also use kernel versions of these methods
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-"""
+# %%
+# We can also use kernel versions of these methods
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 # %%
 kcca = KCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
@@ -45,10 +43,10 @@
 # %%
 kgcca = KGCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
-"""
-Higher order correlation methods
----------------------------------
-"""
+# %%
+# Higher order correlation methods
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 
 # %%
 # Tensor CCA finds higher order correlations so scores are not comparable (but TCCA is equivalent for 2 views)
@@ -57,10 +55,10 @@
 # %%
 ktcca = KTCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))
 
-"""
-Iterative Methods
----------------------
+# %%
+# Iterative Methods
+# ^^^^^^^^^^^^^^^^^^^^^^
+#
+# Most of the iterative methods can also use multiple views e.g.
 
-Most of the iterative methods can also use multiple views e.g.
-"""
 pmd = PMD(latent_dims=latent_dims, c=1).fit((X, Y, X)).score((X, Y, Z))

From 5db0190a5310bb60fc6d017ab0bef4f5d6fd8d71 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 22:48:05 +0000
Subject: [PATCH 37/45] Touching up examples

---
 docs/source/documentation/user_guide.rst | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/docs/source/documentation/user_guide.rst b/docs/source/documentation/user_guide.rst
index bac37bdc..baac3a44 100644
--- a/docs/source/documentation/user_guide.rst
+++ b/docs/source/documentation/user_guide.rst
@@ -1,8 +1,6 @@
 User Guide
 ===========
 
-
-
 Model Fit
 ----------
 
@@ -79,6 +77,16 @@ In applications of cca, we are often interested in the model weights. These can
    view_1_weights=ridge.weights[0]
    view_2_weights=ridge.weights[1]
 
+Model Loadings
+-----------------
+
+Similarly we can access the loadings for a given set of samples
+
+.. sourcecode:: python
+
+   view_1_loadings, view_2_loadings=ridge.get_loadings([train_view_1, train_view_2])
+
+
 Deep Models
 ------------
 

From 435db16c536b0f13024e4526f31fc9bc162214ac Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 17 Nov 2021 23:07:38 +0000
Subject: [PATCH 38/45] Touching up examples

---
 docs/source/documentation/maths.rst | 71 +++++++++++++++++++++++++++++
 docs/source/index.rst               |  1 +
 2 files changed, 72 insertions(+)
 create mode 100644 docs/source/documentation/maths.rst

diff --git a/docs/source/documentation/maths.rst b/docs/source/documentation/maths.rst
new file mode 100644
index 00000000..1277cdbb
--- /dev/null
+++ b/docs/source/documentation/maths.rst
@@ -0,0 +1,71 @@
+Mathematical Foundations
+===========================
+
+Canonical Correlation Analysis (CCA) and Partial Least Squares (PLS) models
+are effective ways of finding associations between multiple views of data.
+
+PCA
+----
+
+It is helpful to start off by formulating PCA in its mathematical form.
+The first principle component can be written as the solution to the convex optimisation problem:
+
+.. math::
+
+    \w_{opt}=\underset{w}{\mathrm{argmax}}\{ w_1^TX_1^TX_1w_1  \}
+
+    \text{subject to:}
+
+    w_1^Tw_1=1
+
+That is the singular vectors of the covariance matrix :math:`X^TX`
+
+PLS
+----
+
+Now consider two data matrices with the same number of samples :math:`X_1` and :math:`X_2`.
+It is tempting to write a slightly different optimisation problem:
+
+.. math::
+
+    \w_{opt}=\underset{w}{\mathrm{argmax}}\{ w_1^TX_1^TX_2w_2  \}
+
+    \text{subject to:}
+
+    w_1^Tw_1=1
+
+    w_2^Tw_2=1
+
+Which is optimised for the left and right singular vectors of the cross covariance matrix :math:`X_1^TX_2`
+
+
+CCA
+----
+
+To arrive at Canonical Correlation
+
+.. math::
+
+    \w_{opt}=\underset{w}{\mathrm{argmax}}\{ w_1^TX_1^TX_2w_2  \}
+
+    \text{subject to:}
+
+    w_1^TX_1^TX_1w_1=1
+
+    w_2^TX_2^TX_2w_2=1
+
+
+Deep CCA
+----------
+
+To arrive
+
+.. math::
+
+    \w_{opt}=\underset{w}{\mathrm{argmax}}\{ f(X_1)^Tf(X_2)  \}
+
+    \text{subject to:}
+
+    f(X_1)^Tf(X_1)=1
+
+    f(X_2)^Tf(X_2)=1
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 84857c44..dd47d1e2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -12,6 +12,7 @@ Documentation
 
    documentation/install
    documentation/getting_started
+   documentation/maths
    documentation/user_guide
    auto_examples/index
 

From 07e264060a8a1d142f4c8077ea55b30fd15f09e0 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Thu, 18 Nov 2021 00:00:00 +0000
Subject: [PATCH 39/45] Touching up examples

---
 examples/plot_hyperparameter_selection.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/plot_hyperparameter_selection.py b/examples/plot_hyperparameter_selection.py
index 8ec239be..1aad1b66 100644
--- a/examples/plot_hyperparameter_selection.py
+++ b/examples/plot_hyperparameter_selection.py
@@ -49,11 +49,9 @@
 # %%
 # Randomized Search
 # ^^^^^^^^^^^^^^^^^^^
-
 # With Randomized Search we can additionally use distributions from scikit-learn to define the parameter search space
 
 # %%
-# Randomized Search
 param_grid = {"kernel": ["poly"], "c": [loguniform(1e-1, 2e-1), [1e-1]], "degree": [[2], [2, 3]]}
 kernel_reg = (
     RandomizedSearchCV(

From 7907165373bb48678f5c1d9b612b10d8070fd333 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Sun, 28 Nov 2021 12:50:33 +0000
Subject: [PATCH 40/45] Deflation options missing from some methods

---
 cca_zoo/data/simulated.py   |  62 +++++++------
 cca_zoo/models/iterative.py | 167 +++++++++++++++++++-----------------
 examples/plot_dcca.py       |   2 +-
 3 files changed, 120 insertions(+), 111 deletions(-)

diff --git a/cca_zoo/data/simulated.py b/cca_zoo/data/simulated.py
index e195e3ac..eb954d91 100644
--- a/cca_zoo/data/simulated.py
+++ b/cca_zoo/data/simulated.py
@@ -10,16 +10,16 @@
 
 
 def generate_covariance_data(
-    n: int,
-    view_features: List[int],
-    latent_dims: int = 1,
-    view_sparsity: List[Union[int, float]] = None,
-    correlation: Union[List[float], float] = 1,
-    structure: Union[str, List[str]] = None,
-    sigma: List[float] = None,
-    decay: float = 0.5,
-    positive=None,
-    random_state: Union[int, np.random.RandomState] = None,
+        n: int,
+        view_features: List[int],
+        latent_dims: int = 1,
+        view_sparsity: List[Union[int, float]] = None,
+        correlation: Union[List[float], float] = 1,
+        structure: Union[str, List[str]] = None,
+        sigma: Union[List[float], float] = None,
+        decay: float = 0.5,
+        positive=None,
+        random_state: Union[int, np.random.RandomState] = None,
 ):
     """
     Function to generate CCA dataset with defined population correlations
@@ -29,7 +29,7 @@ def generate_covariance_data(
     :param view_features: number of features in each view
     :param latent_dims: number of latent dimensions
     :param correlation: correlation either as list with element for each latent dimension or as float which is scaled by 'decay'
-    :param structure: within view covariance structure
+    :param structure: within view covariance structure ('identity','gaussian','toeplitz','random')
     :param sigma: gaussian sigma
     :param decay: ratio of second signal to first signal
     :return: tuple of numpy arrays: view_1, view_2, true weights from view 1, true weights from view 2, overall covariance structure
@@ -58,7 +58,7 @@ def generate_covariance_data(
             covs = []
             true_features = []
             for view_p, sparsity, view_structure, view_positive, view_sigma in zip(
-                view_features, view_sparsity, structure, positive, sigma
+                    view_features, view_sparsity, structure, positive, sigma
             ):
                 # Covariance Bit
                 if view_structure == "identity":
@@ -86,12 +86,9 @@ def generate_covariance_data(
                         * latent_dims,
                         axis=0,
                     ).T
+                    mask = mask.flatten()
                     random_state.shuffle(mask)
-                    while (
-                        np.sum(np.unique(mask, axis=1, return_counts=True)[1] > 1) > 0
-                        or np.sum(np.sum(mask, axis=0) == 0) > 0
-                    ):
-                        random_state.shuffle(mask)
+                    mask = mask.reshape(weights.shape)
                     weights = weights * mask
                     if view_positive:
                         weights[weights < 0] = 0
@@ -113,12 +110,12 @@ def generate_covariance_data(
                     # Cross Bit
                     cross += covs[i] @ A @ covs[j]
                 cov[
-                    splits[i] : splits[i] + view_features[i],
-                    splits[j] : splits[j] + view_features[j],
+                splits[i]: splits[i] + view_features[i],
+                splits[j]: splits[j] + view_features[j],
                 ] = cross
                 cov[
-                    splits[j] : splits[j] + view_features[j],
-                    splits[i] : splits[i] + view_features[i],
+                splits[j]: splits[j] + view_features[j],
+                splits[i]: splits[i] + view_features[i],
                 ] = cross.T
 
             X = np.zeros((n, sum(view_features)))
@@ -133,12 +130,12 @@ def generate_covariance_data(
 
 
 def generate_simple_data(
-    n: int,
-    view_features: List[int],
-    view_sparsity: List[int] = None,
-    eps: float = 0,
-    transform=True,
-    random_state=None,
+        n: int,
+        view_features: List[int],
+        view_sparsity: List[Union[int, float]] = None,
+        eps: float = 0,
+        transform=True,
+        random_state=None,
 ):
     """
     Simple latent variable model to generate data with one latent factor
@@ -165,9 +162,8 @@ def generate_simple_data(
     )
     for p, sparsity in zip(view_features, view_sparsity):
         weights = random_state.randn(p, 1)
-        if sparsity > 0:
-            if sparsity < 1:
-                sparsity = np.ceil(sparsity * p).astype("int")
+        if sparsity <= 1:
+            sparsity = np.ceil(sparsity * p).astype("int")
             weights[random_state.choice(np.arange(p), p - sparsity, replace=False)] = 0
         gaussian_x = random_state.randn(n, p) * eps
         view = np.outer(z, weights)
@@ -200,9 +196,9 @@ def _gaussian(x, mu, sig, dn):
     :param dn:
     """
     return (
-        np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
-        * dn
-        / (np.sqrt(2 * np.pi) * sig)
+            np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+            * dn
+            / (np.sqrt(2 * np.pi) * sig)
     )
 
 
diff --git a/cca_zoo/models/iterative.py b/cca_zoo/models/iterative.py
index bb89d060..b686acd5 100644
--- a/cca_zoo/models/iterative.py
+++ b/cca_zoo/models/iterative.py
@@ -159,15 +159,16 @@ class PLS_ALS(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            max_iter: int = 100,
+            initialization: str = "unregularized",
+            tol: float = 1e-9,
     ):
         """
         Constructor for PLS
@@ -330,17 +331,18 @@ class CCA_ALS(ElasticCCA):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        max_iter: int = 100,
-        initialization: str = "random",
-        tol: float = 1e-9,
-        stochastic=True,
-        positive: Union[Iterable[bool], bool] = None,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            max_iter: int = 100,
+            initialization: str = "random",
+            tol: float = 1e-9,
+            stochastic=True,
+            positive: Union[Iterable[bool], bool] = None,
     ):
         """
         Constructor for CCA_ALS
@@ -368,6 +370,7 @@ def __init__(
             scale=scale,
             positive=positive,
             random_state=random_state,
+            deflation=deflation,
             c=1e-5,
             maxvar=False,
         )
@@ -402,19 +405,20 @@ class SCCA(ElasticCCA):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        c: Union[Iterable[float], float] = None,
-        max_iter: int = 100,
-        maxvar: bool = False,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
-        stochastic=False,
-        positive: Union[Iterable[bool], bool] = None,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            c: Union[Iterable[float], float] = None,
+            max_iter: int = 100,
+            maxvar: bool = False,
+            initialization: str = "unregularized",
+            tol: float = 1e-9,
+            stochastic=False,
+            positive: Union[Iterable[bool], bool] = None,
     ):
         """
         Constructor for SCCA
@@ -446,6 +450,7 @@ def __init__(
             stochastic=stochastic,
             positive=positive,
             random_state=random_state,
+            deflation=deflation,
         )
 
 
@@ -480,17 +485,18 @@ class PMD(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        c: Union[Iterable[float], float] = None,
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
-        positive: Union[Iterable[bool], bool] = None,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            c: Union[Iterable[float], float] = None,
+            max_iter: int = 100,
+            initialization: str = "unregularized",
+            tol: float = 1e-9,
+            positive: Union[Iterable[bool], bool] = None,
     ):
         """
         Constructor for PMD
@@ -517,6 +523,7 @@ def __init__(
             initialization=initialization,
             tol=tol,
             random_state=random_state,
+            deflation=deflation,
         )
 
     def _set_loop_params(self):
@@ -559,16 +566,17 @@ class ParkhomenkoCCA(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        c: Union[Iterable[float], float] = None,
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            c: Union[Iterable[float], float] = None,
+            max_iter: int = 100,
+            initialization: str = "unregularized",
+            tol: float = 1e-9,
     ):
         """
         Constructor for ParkhomenkoCCA
@@ -593,6 +601,7 @@ def __init__(
             initialization=initialization,
             tol=tol,
             random_state=random_state,
+            deflation=deflation,
         )
 
     def _set_loop_params(self):
@@ -634,19 +643,20 @@ class SCCA_ADMM(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        c: Union[Iterable[float], float] = None,
-        mu: Union[Iterable[float], float] = None,
-        lam: Union[Iterable[float], float] = None,
-        eta: Union[Iterable[float], float] = None,
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            c: Union[Iterable[float], float] = None,
+            mu: Union[Iterable[float], float] = None,
+            lam: Union[Iterable[float], float] = None,
+            eta: Union[Iterable[float], float] = None,
+            max_iter: int = 100,
+            initialization: str = "unregularized",
+            tol: float = 1e-9,
     ):
         """
         Constructor for SCCA_ADMM
@@ -677,6 +687,7 @@ def __init__(
             initialization=initialization,
             tol=tol,
             random_state=random_state,
+            deflation=deflation,
         )
 
     def _set_loop_params(self):
@@ -725,16 +736,17 @@ def __init__(
         self,
         latent_dims: int = 1,
         scale: bool = True,
-        centre=True,
-        copy_data=True,
-        max_iter: int = 100,
-        initialization: str = "uniform",
-        tol: float = 1e-9,
-        regularisation="l0",
-        c: Union[Iterable[Union[float, int]], Union[float, int]] = None,
-        rank=1,
-        positive: Union[Iterable[bool], bool] = None,
-        random_state=None,
+            centre=True,
+            copy_data=True,
+            max_iter: int = 100,
+            initialization: str = "uniform",
+            tol: float = 1e-9,
+            regularisation="l0",
+            c: Union[Iterable[Union[float, int]], Union[float, int]] = None,
+            rank=1,
+            positive: Union[Iterable[bool], bool] = None,
+            random_state=None,
+            deflation="cca",
     ):
         """
 
@@ -760,6 +772,7 @@ def __init__(
             initialization=initialization,
             tol=tol,
             random_state=random_state,
+            deflation=deflation,
         )
         self.c = c
         self.regularisation = regularisation
diff --git a/examples/plot_dcca.py b/examples/plot_dcca.py
index 4cd6ef3f..03448335 100644
--- a/examples/plot_dcca.py
+++ b/examples/plot_dcca.py
@@ -46,7 +46,7 @@ def plot_latent_label(model, dataloader, num_batches=100):
 train_dataset = Split_MNIST_Dataset(mnist_type="MNIST", train=True)
 val_dataset = Subset(train_dataset, np.arange(n_train, n_train + n_val))
 train_dataset = Subset(train_dataset, np.arange(n_train))
-train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)
+train_loader, val_loader = get_dataloaders(train_dataset, val_dataset, batch_size=128)
 
 # The number of latent dimensions across models
 latent_dims = 2

From e394e92e47183ad2c6ff84468c720b26853728de Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Mon, 29 Nov 2021 12:18:16 +0000
Subject: [PATCH 41/45] updating default tolerance for iterative methods.

---
 cca_zoo/deepmodels/trainers.py |  15 ++-
 cca_zoo/models/innerloop.py    | 171 +++++++++++++++++----------------
 cca_zoo/models/iterative.py    |  10 +-
 cca_zoo/test/test_models.py    |  94 +++++++++---------
 4 files changed, 150 insertions(+), 140 deletions(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index 754ffbc0..a1e7c04f 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -68,19 +68,18 @@ def on_train_epoch_end(self, unused: Optional = None) -> None:
         self.log("train corr", score)
 
     def on_validation_epoch_end(self, unused: Optional = None) -> None:
-        score = self.score(self.trainer.val_dataloaders[0], train=True).sum()
+        score = self.score(self.trainer.val_dataloaders[0]).sum()
         self.log("val corr", score)
 
     def correlations(
             self,
             loader: torch.utils.data.DataLoader,
-            train: bool = False,
+            train=False,
     ):
         """
 
         :param loader: a dataloader that matches the structure of that used for training
-        :param train: if True and the model requires a final linear CCA this solves and stores the linear CCA
-        :return: numpy array containing correlations between each pair of views for each dimension (#views*#views*#latent_dimensions)
+        :param train: whether to fit final linear transformation
         """
         transformed_views = self.transform(loader, train=train)
         if len(transformed_views) < 2:
@@ -96,12 +95,12 @@ def correlations(
     def transform(
             self,
             loader: torch.utils.data.DataLoader,
-            train: bool = False,
+            train=False,
     ):
         """
 
         :param loader: a dataloader that matches the structure of that used for training
-        :param train: if True and the model requires a final linear CCA this solves and stores the linear CCA
+        :param train: whether to fit final linear transformation
         :return: transformed views
         """
         with torch.no_grad():
@@ -121,12 +120,12 @@ def transform(
     def score(
             self,
             loader: torch.utils.data.DataLoader,
-            train: bool = False,
+            train=False,
     ):
         """
 
         :param loader: a dataloader that matches the structure of that used for training
-        :param train: if True and the model requires a final linear CCA this solves and stores the linear CCA
+        :param train: whether to fit final linear transformation
         :return: by default returns the average pairwise correlation in each dimension (for 2 views just the correlation)
         """
         pair_corrs = self.correlations(loader, train=train)
diff --git a/cca_zoo/models/innerloop.py b/cca_zoo/models/innerloop.py
index 13ca70ed..743278b8 100644
--- a/cca_zoo/models/innerloop.py
+++ b/cca_zoo/models/innerloop.py
@@ -20,11 +20,11 @@
 
 class _InnerLoop:
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol: float = 1e-5,
-        initialization: str = "unregularized",
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol: float = 1e-9,
+            initialization: str = "unregularized",
+            random_state=None,
     ):
         """
         :param max_iter: maximum number of iterations to perform if tol is not reached
@@ -45,7 +45,7 @@ def _check_params(self):
     def _initialize(self):
         if self.initialization == "random":
             self.scores = np.array(
-                [self.random_state.randn(view.shape[0], 1) for view in self.views]
+                [self.random_state.normal(0, 1, size=(view.shape[0], 1)) for view in self.views]
             )
         elif self.initialization == "uniform":
             self.scores = np.array([np.ones((view.shape[0], 1)) for view in self.views])
@@ -56,15 +56,17 @@ def _initialize(self):
                     random_state=self.random_state,
                     tol=self.tol,
                 )
-                ._fit(*self.views)
-                .scores
+                    ._fit(*self.views)
+                    .scores
             )
+        elif callable(self.initialization):
+            self.scores = next(self.initialization())
         else:
             raise ValueError("initialize must be random, uniform or unregularized")
         self.scores = (
-            self.scores
-            * np.sqrt(self.n - 1)
-            / np.linalg.norm(self.scores, axis=1)[:, np.newaxis]
+                self.scores
+                * np.sqrt(self.n - 1)
+                / np.linalg.norm(self.scores, axis=1)[:, np.newaxis]
         )
         self.weights = [
             self.random_state.randn(view.shape[1], 1) for view in self.views
@@ -119,11 +121,11 @@ def _objective(self) -> int:
 
 class PLSInnerLoop(_InnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -156,7 +158,7 @@ def _update_view(self, view_index: int):
         targets = np.ma.array(self.scores, mask=False)
         targets.mask[view_index] = True
         self.weights[view_index] = (
-            self.views[view_index].T @ targets.sum(axis=0).filled()
+                self.views[view_index].T @ targets.sum(axis=0).filled()
         )
         self.weights[view_index] /= np.linalg.norm(self.weights[view_index])
         self.scores[view_index] = self.views[view_index] @ self.weights[view_index]
@@ -164,8 +166,8 @@ def _update_view(self, view_index: int):
     def _early_stop(self) -> bool:
         # Some kind of early stopping
         if all(
-            _cosine_similarity(self.scores[n], self.old_scores[n]) > (1 - self.tol)
-            for n, view in enumerate(self.scores)
+                _cosine_similarity(self.scores[n], self.old_scores[n]) > (1 - self.tol)
+                for n, view in enumerate(self.scores)
         ):
             return True
         else:
@@ -174,13 +176,13 @@ def _early_stop(self) -> bool:
 
 class PMDInnerLoop(PLSInnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        c=None,
-        positive=None,
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            c=None,
+            positive=None,
+            random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -222,12 +224,13 @@ def _update_view(self, view_index: int):
         targets = np.ma.array(self.scores, mask=False)
         targets.mask[view_index] = True
         self.weights[view_index] = (
-            self.views[view_index].T @ targets.sum(axis=0).filled()
+                self.views[view_index].T @ targets.sum(axis=0).filled()
         )
         self.weights[view_index] = _delta_search(
             self.weights[view_index],
             self.c[view_index],
             positive=self.positive[view_index],
+            tol=self.tol
         )
         _check_converged_weights(self.weights[view_index], view_index)
         self.scores[view_index] = self.views[view_index] @ self.weights[view_index]
@@ -235,12 +238,12 @@ def _update_view(self, view_index: int):
 
 class ParkhomenkoInnerLoop(PLSInnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        c=None,
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            c=None,
+            random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -274,16 +277,16 @@ def _update_view(self, view_index: int):
 
 class ElasticInnerLoop(PLSInnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        c=None,
-        l1_ratio=None,
-        maxvar=True,
-        stochastic=True,
-        positive=None,
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            c=None,
+            l1_ratio=None,
+            maxvar=True,
+            stochastic=True,
+            positive=None,
+            random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -350,8 +353,8 @@ def _update_view(self, view_index: int):
         if not self.maxvar:
             _check_converged_weights(self.weights[view_index], view_index)
             self.weights[view_index] = self.weights[view_index] / (
-                np.linalg.norm(self.views[view_index] @ self.weights[view_index])
-                / np.sqrt(self.n)
+                    np.linalg.norm(self.views[view_index] @ self.weights[view_index])
+                    / np.sqrt(self.n)
             )
         self.scores[view_index] = self.views[view_index] @ self.weights[view_index]
 
@@ -388,15 +391,15 @@ def _early_stop(self) -> bool:
 
 class ADMMInnerLoop(ElasticInnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        mu=None,
-        lam=None,
-        c=None,
-        eta=None,
-        random_state=None,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            mu=None,
+            lam=None,
+            c=None,
+            eta=None,
+            random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -455,9 +458,9 @@ def _update_view(self, view_index: int):
                 / lam
                 * self.views[view_index].T
                 @ (
-                    self.views[view_index] @ self.weights[view_index]
-                    - self.z[view_index]
-                    + self.eta[view_index]
+                        self.views[view_index] @ self.weights[view_index]
+                        - self.z[view_index]
+                        + self.eta[view_index]
                 ),
                 mu,
                 gradient,
@@ -473,9 +476,9 @@ def _update_view(self, view_index: int):
                 self.views[view_index] @ self.weights[view_index] + self.eta[view_index]
             )
             self.eta[view_index] = (
-                self.eta[view_index]
-                + self.views[view_index] @ self.weights[view_index]
-                - self.z[view_index]
+                    self.eta[view_index]
+                    + self.views[view_index] @ self.weights[view_index]
+                    - self.z[view_index]
             )
             norm_eta.append(np.linalg.norm(self.eta[view_index]))
             norm_proj.append(
@@ -507,15 +510,15 @@ def _prox_lam_g(self, x):
 
 class SpanCCAInnerLoop(_InnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-5,
-        initialization: str = "unregularized",
-        c=None,
-        regularisation="l0",
-        rank=1,
-        random_state=None,
-        positive=False,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            c=None,
+            regularisation="l0",
+            rank=1,
+            random_state=None,
+            positive=False,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -568,15 +571,15 @@ def _inner_iteration(self):
 
 class SWCCAInnerLoop(PLSInnerLoop):
     def __init__(
-        self,
-        max_iter: int = 100,
-        tol=1e-20,
-        initialization: str = "unregularized",
-        regularisation="l0",
-        c=None,
-        sample_support: int = None,
-        random_state=None,
-        positive=False,
+            self,
+            max_iter: int = 100,
+            tol=1e-9,
+            initialization: str = "unregularized",
+            regularisation="l0",
+            c=None,
+            sample_support: int = None,
+            random_state=None,
+            positive=False,
     ):
         super().__init__(
             max_iter=max_iter,
@@ -610,8 +613,8 @@ def _update_view(self, view_index: int):
         targets = np.ma.array(self.scores, mask=False)
         targets.mask[view_index] = True
         self.weights[view_index] = (
-            self.views[view_index] * self.sample_weights
-        ).T @ targets.sum(axis=0).filled()
+                                           self.views[view_index] * self.sample_weights
+                                   ).T @ targets.sum(axis=0).filled()
         self.weights[view_index] = self.update(
             self.weights[view_index],
             self.c[view_index],
@@ -674,7 +677,7 @@ def _bin_search(current, previous, current_val, previous_val, min_, max_):
     return new, current, min_, max_
 
 
-def _delta_search(w, c, positive=False, init=0):
+def _delta_search(w, c, positive=False, init=0, tol=1e-9):
     """
     Searches for threshold delta such that the 1-norm of weights w is less than or equal to c
     :param w: weights found by one power method iteration
@@ -700,12 +703,12 @@ def _delta_search(w, c, positive=False, init=0):
             current, previous, current_val, previous_val, min_, max_
         )
         previous_val = current_val
-        if np.abs(current_val) < 1e-5 or np.abs(max_ - min_) < 1e-30 or i == 50:
+        if np.abs(current_val) < tol or np.abs(max_ - min_) < tol or i == 50:
             converged = True
     return coef
 
 
-def _soft_threshold(x, threshold, positive=False):
+def _soft_threshold(x, threshold, positive=False, **kwargs):
     """
     if absolute value of x less than threshold replace with zero
     :param x: input
@@ -720,7 +723,7 @@ def _soft_threshold(x, threshold, positive=False):
     return u * np.sign(x)
 
 
-def _support_soft_thresh(x, support, positive=False):
+def _support_soft_thresh(x, support, positive=False, **kwargs):
     if x.shape[0] <= support or np.linalg.norm(x) == 0:
         return x
     if positive:
diff --git a/cca_zoo/models/iterative.py b/cca_zoo/models/iterative.py
index b686acd5..20680b16 100644
--- a/cca_zoo/models/iterative.py
+++ b/cca_zoo/models/iterative.py
@@ -101,7 +101,6 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
             self.track.append(self.loop.track)
             if self.track[-1]["converged"] == False:
                 warnings.warn(f"Inner loop {k} did not converge or converged to nans")
-                break
         return self
 
     def _deflate(self, residual, score, loading):
@@ -114,11 +113,13 @@ def _deflate(self, residual, score, loading):
         """
         if self.deflation == "cca":
             return (
-                residual
-                - np.outer(score, score) @ residual / np.dot(score, score).item()
+                    residual
+                    - np.outer(score, score) @ residual / np.dot(score, score).item()
             )
         elif self.deflation == "pls":
             return residual - np.outer(score, loading)
+        else:
+            raise ValueError(f'deflation method {self.deflation} not implemented yet.')
 
     @abstractmethod
     def _set_loop_params(self):
@@ -165,7 +166,6 @@ def __init__(
             centre=True,
             copy_data=True,
             random_state=None,
-            deflation="cca",
             max_iter: int = 100,
             initialization: str = "unregularized",
             tol: float = 1e-9,
@@ -187,7 +187,7 @@ def __init__(
             scale=scale,
             centre=centre,
             copy_data=copy_data,
-            deflation="pls",
+            deflation='pls',
             max_iter=max_iter,
             initialization=initialization,
             tol=tol,
diff --git a/cca_zoo/test/test_models.py b/cca_zoo/test/test_models.py
index 51e750be..a76d8a1e 100644
--- a/cca_zoo/test/test_models.py
+++ b/cca_zoo/test/test_models.py
@@ -65,56 +65,56 @@ def test_unregularized_methods():
     assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca, decimal=1) is None
     assert np.testing.assert_array_almost_equal(corr_cca, corr_tcca, decimal=1) is None
     assert (
-        np.testing.assert_array_almost_equal(corr_kgcca, corr_gcca, decimal=1) is None
+            np.testing.assert_array_almost_equal(corr_kgcca, corr_gcca, decimal=1) is None
     )
     # Check standardized models have standard outputs
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(iter.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(iter.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(cca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(cca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(mcca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(mcca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(kcca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(kcca.transform((X, Y))[0], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(iter.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(iter.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(cca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(cca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(mcca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(mcca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
     assert (
-        np.testing.assert_allclose(
-            np.linalg.norm(kcca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
-        )
-        is None
+            np.testing.assert_allclose(
+                np.linalg.norm(kcca.transform((X, Y))[1], axis=0) ** 2, n, rtol=0.2
+            )
+            is None
     )
 
 
@@ -182,7 +182,7 @@ def test_regularized_methods():
     # Check the correlations from each unregularized method are the same
     assert np.testing.assert_array_almost_equal(corr_pls, corr_mcca, decimal=1) is None
     assert (
-        np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1) is None
+            np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1) is None
     )
     assert np.testing.assert_array_almost_equal(corr_pls, corr_rcca, decimal=1) is None
 
@@ -266,10 +266,10 @@ def test_weighted_GCCA_methods():
     K[0, 200:] = 0
     unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit((X, Y), K=K)
     assert (
-        np.testing.assert_array_almost_equal(
-            corr_unweighted_gcca, corr_deweighted_gcca, decimal=1
-        )
-        is None
+            np.testing.assert_array_almost_equal(
+                corr_unweighted_gcca, corr_deweighted_gcca, decimal=1
+            )
+            is None
     )
 
 
@@ -300,6 +300,14 @@ def test_l0():
     assert (np.abs(swcca.loop.sample_weights) > 1e-5).sum() == 5
 
 
+def test_pls():
+    pls_als = PLS_ALS(latent_dims=3)
+    pls = PLS(latent_dims=3)
+    pls_als.fit((X, Y))
+    pls.fit((X, Y))
+    assert (np.allclose(np.abs(pls_als.weights[0]), np.abs(pls.weights[0]), rtol=1e-3))
+
+
 def test_VCCA():
     try:
         from cca_zoo.probabilisticmodels import VariationalCCA
@@ -314,11 +322,11 @@ def test_VCCA():
         ).fit([X, Y])
         # Test that vanilla CCA and VCCA produce roughly similar latent space
         assert (
-            np.corrcoef(
-                cca.transform([X, Y])[1].T,
-                vcca.posterior_samples["z"].mean(axis=0)[:, 0],
-            )[0, 1]
-            > 0.9
+                np.corrcoef(
+                    cca.transform([X, Y])[1].T,
+                    vcca.posterior_samples["z"].mean(axis=0)[:, 0],
+                )[0, 1]
+                > 0.9
         )
     except:
         # some might not have access to jax/numpyro so leave this as an optional test locally.

From 271c91be3d0566f0ef91d107a0f3a1b579d2a093 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Mon, 29 Nov 2021 15:18:02 +0000
Subject: [PATCH 42/45] updating default tolerance for iterative methods.

---
 cca_zoo/deepmodels/trainers.py  | 10 ++++++++--
 cca_zoo/test/test_deepmodels.py |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/cca_zoo/deepmodels/trainers.py b/cca_zoo/deepmodels/trainers.py
index a1e7c04f..2192a113 100644
--- a/cca_zoo/deepmodels/trainers.py
+++ b/cca_zoo/deepmodels/trainers.py
@@ -25,6 +25,7 @@ def __init__(
         super().__init__()
         self.save_hyperparameters()
         self.model = model
+        self.sanity_check = True
 
     def forward(self, *args):
         z = self.encode(*args)
@@ -68,8 +69,13 @@ def on_train_epoch_end(self, unused: Optional = None) -> None:
         self.log("train corr", score)
 
     def on_validation_epoch_end(self, unused: Optional = None) -> None:
-        score = self.score(self.trainer.val_dataloaders[0]).sum()
-        self.log("val corr", score)
+        try:
+            score = self.score(self.trainer.val_dataloaders[0]).sum()
+            self.log("val corr", score)
+        except:
+            # Should only be during sanity check
+            score = self.score(self.trainer.val_dataloaders[0], train=True).sum()
+            self.log("val corr", score)
 
     def correlations(
             self,
diff --git a/cca_zoo/test/test_deepmodels.py b/cca_zoo/test/test_deepmodels.py
index a1fc4dec..61c6c37b 100644
--- a/cca_zoo/test/test_deepmodels.py
+++ b/cca_zoo/test/test_deepmodels.py
@@ -85,7 +85,7 @@ def test_DCCA_methods():
     trainer = pl.Trainer(
         max_epochs=epochs, log_every_n_steps=10, enable_checkpointing=False
     )
-    trainer.fit(dcca, train_loader)
+    trainer.fit(dcca, train_loader, val_dataloaders=val_loader)
     assert (
             np.testing.assert_array_less(
                 cca.score((X, Y)).sum(), trainer.model.score(train_loader).sum()

From 4c6361ff1899d02bb01f8cb36bca4d99fabcbc55 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Mon, 29 Nov 2021 15:19:16 +0000
Subject: [PATCH 43/45] fix problem with post_transform for deep models

---
 cca_zoo/data/toy.py                       | 82 +++++++++++++++--------
 cca_zoo/deepmodels/__init__.py            |  2 +-
 cca_zoo/deepmodels/dccae.py               |  3 +-
 cca_zoo/models/innerloop.py               |  7 +-
 cca_zoo/models/iterative.py               | 10 +--
 cca_zoo/test/test_models.py               |  2 +-
 examples/plot_dcca.py                     | 20 +++---
 examples/plot_dvcca.py                    | 22 +++---
 examples/plot_hyperparameter_selection.py | 24 +++----
 setup.py                                  |  2 +-
 10 files changed, 104 insertions(+), 70 deletions(-)

diff --git a/cca_zoo/data/toy.py b/cca_zoo/data/toy.py
index e91e79da..68b904e6 100644
--- a/cca_zoo/data/toy.py
+++ b/cca_zoo/data/toy.py
@@ -80,29 +80,42 @@ def __init__(
         :param flatten: whether to flatten the data into array or use 2d images
         """
         if mnist_type == "MNIST":
-            self.dataset = datasets.MNIST("../../data", train=train, download=True,
-                                          transform=torchvision.transforms.Compose([
-                                              torchvision.transforms.ToTensor()]))
+            self.dataset = datasets.MNIST(
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [torchvision.transforms.ToTensor()]
+                ),
+            )
         elif mnist_type == "FashionMNIST":
             self.dataset = datasets.FashionMNIST(
-                "../../data", train=train, download=True, transform=torchvision.transforms.Compose([
-                    torchvision.transforms.ToTensor()]))
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [torchvision.transforms.ToTensor()]
+                ),
+            )
         elif mnist_type == "KMNIST":
-            self.dataset = datasets.KMNIST("../../data", train=train, download=True,
-                                           transform=torchvision.transforms.Compose([
-                                               torchvision.transforms.ToTensor(),
-                                               transforms.Normalize((0.1307,), (0.3081,))]))
+            self.dataset = datasets.KMNIST(
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [
+                        torchvision.transforms.ToTensor(),
+                        transforms.Normalize((0.1307,), (0.3081,)),
+                    ]
+                ),
+            )
 
         self.base_transform = transforms.ToTensor()
         self.a_transform = transforms.Compose(
-            [
-                torchvision.transforms.RandomRotation((-45, 45))
-            ]
+            [torchvision.transforms.RandomRotation((-45, 45))]
         )
         self.a_transform = transforms.Compose(
-            [
-                torchvision.transforms.RandomRotation((-45, 45))
-            ]
+            [torchvision.transforms.RandomRotation((-45, 45))]
         )
         self.b_transform = transforms.Compose(
             [
@@ -149,22 +162,37 @@ def __init__(self, mnist_type="MNIST", train=True, flatten=True):
         :param flatten: whether to flatten the data into array or use 2d images
         """
         if mnist_type == "MNIST":
-            self.dataset = datasets.MNIST("../../data", train=train, download=True,
-                                          transform=torchvision.transforms.Compose([
-                                              torchvision.transforms.ToTensor()]))
+            self.dataset = datasets.MNIST(
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [torchvision.transforms.ToTensor()]
+                ),
+            )
         elif mnist_type == "FashionMNIST":
             self.dataset = datasets.FashionMNIST(
-                "../../data", train=train, download=True, transform=torchvision.transforms.Compose([
-                    torchvision.transforms.ToTensor()]))
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [torchvision.transforms.ToTensor()]
+                ),
+            )
         elif mnist_type == "KMNIST":
-            self.dataset = datasets.KMNIST("../../data", train=train, download=True,
-                                           transform=torchvision.transforms.Compose([
-                                               torchvision.transforms.ToTensor(),
-                                               transforms.Normalize((0.1307,), (0.3081,))]))
+            self.dataset = datasets.KMNIST(
+                "../../data",
+                train=train,
+                download=True,
+                transform=torchvision.transforms.Compose(
+                    [
+                        torchvision.transforms.ToTensor(),
+                        transforms.Normalize((0.1307,), (0.3081,)),
+                    ]
+                ),
+            )
         self.transform = transforms.Compose(
-            [
-                torchvision.transforms.RandomRotation((-45, 45))
-            ]
+            [torchvision.transforms.RandomRotation((-45, 45))]
         )
         self.targets = self.dataset.targets
         self.filtered_classes = []
diff --git a/cca_zoo/deepmodels/__init__.py b/cca_zoo/deepmodels/__init__.py
index bb50f89f..97d43dec 100644
--- a/cca_zoo/deepmodels/__init__.py
+++ b/cca_zoo/deepmodels/__init__.py
@@ -10,4 +10,4 @@
 from .dvcca import DVCCA
 from .splitae import SplitAE
 from .trainers import CCALightning
-from .utils import get_dataloaders, process_data
\ No newline at end of file
+from .utils import get_dataloaders, process_data
diff --git a/cca_zoo/deepmodels/dccae.py b/cca_zoo/deepmodels/dccae.py
index 499a17e0..054465a7 100644
--- a/cca_zoo/deepmodels/dccae.py
+++ b/cca_zoo/deepmodels/dccae.py
@@ -80,6 +80,7 @@ def loss(self, *args):
     @staticmethod
     def _recon_loss(x, recon):
         recons = [
-            F.binary_cross_entropy(recon_, x_, reduction="mean") for recon_, x_ in zip(recon, x)
+            F.binary_cross_entropy(recon_, x_, reduction="mean")
+            for recon_, x_ in zip(recon, x)
         ]
         return torch.stack(recons).sum(dim=0)
diff --git a/cca_zoo/models/innerloop.py b/cca_zoo/models/innerloop.py
index 743278b8..39f31f7b 100644
--- a/cca_zoo/models/innerloop.py
+++ b/cca_zoo/models/innerloop.py
@@ -45,7 +45,10 @@ def _check_params(self):
     def _initialize(self):
         if self.initialization == "random":
             self.scores = np.array(
-                [self.random_state.normal(0, 1, size=(view.shape[0], 1)) for view in self.views]
+                [
+                    self.random_state.normal(0, 1, size=(view.shape[0], 1))
+                    for view in self.views
+                ]
             )
         elif self.initialization == "uniform":
             self.scores = np.array([np.ones((view.shape[0], 1)) for view in self.views])
@@ -230,7 +233,7 @@ def _update_view(self, view_index: int):
             self.weights[view_index],
             self.c[view_index],
             positive=self.positive[view_index],
-            tol=self.tol
+            tol=self.tol,
         )
         _check_converged_weights(self.weights[view_index], view_index)
         self.scores[view_index] = self.views[view_index] @ self.weights[view_index]
diff --git a/cca_zoo/models/iterative.py b/cca_zoo/models/iterative.py
index 20680b16..083359ef 100644
--- a/cca_zoo/models/iterative.py
+++ b/cca_zoo/models/iterative.py
@@ -119,7 +119,7 @@ def _deflate(self, residual, score, loading):
         elif self.deflation == "pls":
             return residual - np.outer(score, loading)
         else:
-            raise ValueError(f'deflation method {self.deflation} not implemented yet.')
+            raise ValueError(f"deflation method {self.deflation} not implemented yet.")
 
     @abstractmethod
     def _set_loop_params(self):
@@ -187,7 +187,7 @@ def __init__(
             scale=scale,
             centre=centre,
             copy_data=copy_data,
-            deflation='pls',
+            deflation="pls",
             max_iter=max_iter,
             initialization=initialization,
             tol=tol,
@@ -733,9 +733,9 @@ class SpanCCA(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
             centre=True,
             copy_data=True,
             max_iter: int = 100,
diff --git a/cca_zoo/test/test_models.py b/cca_zoo/test/test_models.py
index a76d8a1e..e9e17d0f 100644
--- a/cca_zoo/test/test_models.py
+++ b/cca_zoo/test/test_models.py
@@ -305,7 +305,7 @@ def test_pls():
     pls = PLS(latent_dims=3)
     pls_als.fit((X, Y))
     pls.fit((X, Y))
-    assert (np.allclose(np.abs(pls_als.weights[0]), np.abs(pls.weights[0]), rtol=1e-3))
+    assert np.allclose(np.abs(pls_als.weights[0]), np.abs(pls.weights[0]), rtol=1e-3)
 
 
 def test_VCCA():
diff --git a/examples/plot_dcca.py b/examples/plot_dcca.py
index 03448335..cd4ac539 100644
--- a/examples/plot_dcca.py
+++ b/examples/plot_dcca.py
@@ -26,16 +26,16 @@
 def plot_latent_label(model, dataloader, num_batches=100):
     fig, ax = plt.subplots(ncols=model.latent_dims)
     for j in range(model.latent_dims):
-        ax[j].set_title(f'Dimension {j}')
-        ax[j].set_xlabel('View 1')
-        ax[j].set_ylabel('View 2')
+        ax[j].set_title(f"Dimension {j}")
+        ax[j].set_xlabel("View 1")
+        ax[j].set_ylabel("View 2")
     for i, (data, label) in enumerate(dataloader):
         z = model(*data)
         zx, zy = z
-        zx = zx.to('cpu').detach().numpy()
-        zy = zy.to('cpu').detach().numpy()
+        zx = zx.to("cpu").detach().numpy()
+        zy = zy.to("cpu").detach().numpy()
         for j in range(model.latent_dims):
-            ax[j].scatter(zx[:, j], zy[:, j], c=label.numpy(), cmap='tab10')
+            ax[j].scatter(zx[:, j], zy[:, j], c=label.numpy(), cmap="tab10")
         if i > num_batches:
             plt.colorbar()
             break
@@ -63,7 +63,7 @@ def plot_latent_label(model, dataloader, num_batches=100):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
 plot_latent_label(dcca.model, train_loader)
-plt.suptitle('DCCA')
+plt.suptitle("DCCA")
 plt.show()
 
 # %%
@@ -75,7 +75,7 @@ def plot_latent_label(model, dataloader, num_batches=100):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca_noi, train_loader, val_loader)
 plot_latent_label(dcca_noi.model, train_loader)
-plt.title('DCCA by Non-Linear Orthogonal Iterations')
+plt.title("DCCA by Non-Linear Orthogonal Iterations")
 plt.show()
 
 # %%
@@ -87,7 +87,7 @@ def plot_latent_label(model, dataloader, num_batches=100):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca_sdl, train_loader, val_loader)
 plot_latent_label(dcca_sdl.model, train_loader)
-plt.title('DCCA by Stochastic Decorrelation')
+plt.title("DCCA by Stochastic Decorrelation")
 plt.show()
 
 # %%
@@ -97,5 +97,5 @@ def plot_latent_label(model, dataloader, num_batches=100):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
 plot_latent_label(dcca_sdl.model, train_loader)
-plt.title('DCCA by Barlow Twins')
+plt.title("DCCA by Barlow Twins")
 plt.show()
diff --git a/examples/plot_dvcca.py b/examples/plot_dvcca.py
index 62df7811..5dbb8b2e 100644
--- a/examples/plot_dvcca.py
+++ b/examples/plot_dvcca.py
@@ -30,10 +30,10 @@ def plot_reconstruction(model, dataset, idx):
     recon_x = recon_x.detach().numpy()
     recon_y = recon_y.detach().numpy()
     fig, ax = plt.subplots(ncols=4)
-    ax[0].set_title('Original View 1')
-    ax[1].set_title('Original View 2')
-    ax[2].set_title('Reconstruction View 1')
-    ax[3].set_title('Reconstruction View 2')
+    ax[0].set_title("Original View 1")
+    ax[1].set_title("Original View 2")
+    ax[2].set_title("Reconstruction View 1")
+    ax[3].set_title("Reconstruction View 2")
     ax[0].imshow(x.detach().numpy().reshape((28, 28)))
     ax[1].imshow(y.detach().numpy().reshape((28, 28)))
     ax[2].imshow(recon_x.reshape((28, 28)))
@@ -58,8 +58,12 @@ def plot_reconstruction(model, dataset, idx):
 encoder_2 = architectures.Encoder(
     latent_dims=latent_dims, feature_size=784, variational=True
 )
-decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
-decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
+decoder_1 = architectures.Decoder(
+    latent_dims=latent_dims, feature_size=784, norm_output=True
+)
+decoder_2 = architectures.Decoder(
+    latent_dims=latent_dims, feature_size=784, norm_output=True
+)
 
 # %%
 # Deep VCCA
@@ -72,7 +76,7 @@ def plot_reconstruction(model, dataset, idx):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
 plot_reconstruction(dcca.model, train_dataset, 0)
-plt.suptitle('DVCCA')
+plt.suptitle("DVCCA")
 plt.show()
 
 # %%
@@ -96,7 +100,7 @@ def plot_reconstruction(model, dataset, idx):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
 plot_reconstruction(dcca.model, train_dataset, 0)
-plt.suptitle('DVCCA Private')
+plt.suptitle("DVCCA Private")
 plt.show()
 
 # %%
@@ -112,5 +116,5 @@ def plot_reconstruction(model, dataset, idx):
 trainer = pl.Trainer(max_epochs=epochs, enable_checkpointing=False)
 trainer.fit(dcca, train_loader, val_loader)
 plot_reconstruction(dcca.model, train_dataset, 0)
-plt.suptitle('DCCAE')
+plt.suptitle("DCCAE")
 plt.show()
diff --git a/examples/plot_hyperparameter_selection.py b/examples/plot_hyperparameter_selection.py
index 1aad1b66..6b4ea27e 100644
--- a/examples/plot_hyperparameter_selection.py
+++ b/examples/plot_hyperparameter_selection.py
@@ -38,12 +38,9 @@
 
 # %%
 param_grid = {"kernel": ["poly"], "c": [[1e-1], [1e-1, 2e-1]], "degree": [[2], [2, 3]]}
-kernel_reg = (
-    GridSearchCV(
-        KCCA(latent_dims=latent_dims), param_grid=param_grid, cv=cv, verbose=True
-    )
-        .fit([X, Y])
-)
+kernel_reg = GridSearchCV(
+    KCCA(latent_dims=latent_dims), param_grid=param_grid, cv=cv, verbose=True
+).fit([X, Y])
 print(pd.DataFrame(kernel_reg.cv_results_))
 
 # %%
@@ -52,11 +49,12 @@
 # With Randomized Search we can additionally use distributions from scikit-learn to define the parameter search space
 
 # %%
-param_grid = {"kernel": ["poly"], "c": [loguniform(1e-1, 2e-1), [1e-1]], "degree": [[2], [2, 3]]}
-kernel_reg = (
-    RandomizedSearchCV(
-        KCCA(latent_dims=latent_dims), param_distributions=param_grid, cv=cv, verbose=True
-    )
-        .fit([X, Y])
-)
+param_grid = {
+    "kernel": ["poly"],
+    "c": [loguniform(1e-1, 2e-1), [1e-1]],
+    "degree": [[2], [2, 3]],
+}
+kernel_reg = RandomizedSearchCV(
+    KCCA(latent_dims=latent_dims), param_distributions=param_grid, cv=cv, verbose=True
+).fit([X, Y])
 print(pd.DataFrame(kernel_reg.cv_results_))
diff --git a/setup.py b/setup.py
index 8b6d1f29..21bf8a44 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
     EXTRA_PACKAGES["deep"] = f.read()
 with open("./requirements/probabilistic.txt", "r") as f:
     EXTRA_PACKAGES["probabilistic"] = f.read()
-EXTRA_PACKAGES["all"] = EXTRA_PACKAGES["deep"]+'\n'+EXTRA_PACKAGES["probabilistic"]
+EXTRA_PACKAGES["all"] = EXTRA_PACKAGES["deep"] + "\n" + EXTRA_PACKAGES["probabilistic"]
 
 setup(
     name="cca_zoo",

From 1da3cb3616f6b27fc1b7a889dc047b67976574f0 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 1 Dec 2021 17:55:42 +0000
Subject: [PATCH 44/45] Initialization change for iterative methods

---
 cca_zoo/models/innerloop.py |  84 ++++++++-----------------
 cca_zoo/models/iterative.py | 119 +++++++++++++++++++++++-------------
 cca_zoo/test/test_models.py |  24 ++++----
 3 files changed, 117 insertions(+), 110 deletions(-)

diff --git a/cca_zoo/models/innerloop.py b/cca_zoo/models/innerloop.py
index 39f31f7b..8d70b8da 100644
--- a/cca_zoo/models/innerloop.py
+++ b/cca_zoo/models/innerloop.py
@@ -23,7 +23,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol: float = 1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             random_state=None,
     ):
         """
@@ -31,7 +31,7 @@ def __init__(
         :param tol: tolerance value used for stopping criteria
         :param initialization: initialise the optimisation with either the 'unregularized' (CCA/PLS) solution, or a 'random' initialisation
         """
-        self.initialization = initialization
+        self.scores = initial_scores
         self.max_iter = max_iter
         self.tol = tol
         self.random_state = check_random_state(random_state)
@@ -42,47 +42,13 @@ def _check_params(self):
         """
         pass
 
-    def _initialize(self):
-        if self.initialization == "random":
-            self.scores = np.array(
-                [
-                    self.random_state.normal(0, 1, size=(view.shape[0], 1))
-                    for view in self.views
-                ]
-            )
-        elif self.initialization == "uniform":
-            self.scores = np.array([np.ones((view.shape[0], 1)) for view in self.views])
-        elif self.initialization == "unregularized":
-            self.scores = (
-                PLSInnerLoop(
-                    initialization="random",
-                    random_state=self.random_state,
-                    tol=self.tol,
-                )
-                    ._fit(*self.views)
-                    .scores
-            )
-        elif callable(self.initialization):
-            self.scores = next(self.initialization())
-        else:
-            raise ValueError("initialize must be random, uniform or unregularized")
-        self.scores = (
-                self.scores
-                * np.sqrt(self.n - 1)
-                / np.linalg.norm(self.scores, axis=1)[:, np.newaxis]
-        )
-        self.weights = [
-            self.random_state.randn(view.shape[1], 1) for view in self.views
-        ]
-
     def _fit(self, *views: np.ndarray):
         self.views = views
         self.n = views[0].shape[0]
 
         # Check that the parameters that have been passed are valid for these views given #views and #features
         self._check_params()
-        self._initialize()
-
+        self.weights = [self.random_state.randn(view.shape[1]) for view in self.views]
         self.track = {}
         self.track["converged"] = False
         # Iterate until convergence
@@ -127,13 +93,13 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
 
@@ -164,7 +130,9 @@ def _update_view(self, view_index: int):
                 self.views[view_index].T @ targets.sum(axis=0).filled()
         )
         self.weights[view_index] /= np.linalg.norm(self.weights[view_index])
-        self.scores[view_index] = self.views[view_index] @ self.weights[view_index]
+        self.scores[view_index] = self.views[view_index] @ np.squeeze(
+            np.array(self.weights[view_index])
+        )
 
     def _early_stop(self) -> bool:
         # Some kind of early stopping
@@ -182,7 +150,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             c=None,
             positive=None,
             random_state=None,
@@ -190,7 +158,7 @@ def __init__(
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.c = c
@@ -244,14 +212,14 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             c=None,
             random_state=None,
     ):
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.c = c
@@ -283,7 +251,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             c=None,
             l1_ratio=None,
             maxvar=True,
@@ -294,7 +262,7 @@ def __init__(
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.stochastic = stochastic
@@ -363,7 +331,7 @@ def _update_view(self, view_index: int):
 
     @ignore_warnings(category=ConvergenceWarning)
     def _elastic_solver(self, X, y, view_index):
-        return np.expand_dims(self.regressors[view_index].fit(X, y.ravel()).coef_, 1)
+        return self.regressors[view_index].fit(X, y.ravel()).coef_
 
     def _objective(self):
         views = len(self.views)
@@ -397,7 +365,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             mu=None,
             lam=None,
             c=None,
@@ -407,7 +375,7 @@ def __init__(
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.c = c
@@ -433,9 +401,9 @@ def _check_params(self):
         _check_Parikh2014(self.mu, self.lam, self.views)
 
         self.eta = [
-            np.ones((view.shape[0], 1)) * eta for view, eta in zip(self.views, self.eta)
+            np.ones(view.shape[0]) * eta for view, eta in zip(self.views, self.eta)
         ]
-        self.z = [np.zeros((view.shape[0], 1)) for view in self.views]
+        self.z = [np.zeros(view.shape[0]) for view in self.views]
         self.l1_ratio = [1] * len(self.views)
 
     def _update_view(self, view_index: int):
@@ -516,7 +484,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             c=None,
             regularisation="l0",
             rank=1,
@@ -526,7 +494,7 @@ def __init__(
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.c = c
@@ -556,7 +524,7 @@ def _check_params(self):
         )
 
     def _inner_iteration(self):
-        c = self.random_state.randn(self.rank, 1)
+        c = self.random_state.randn(self.rank)
         c /= np.linalg.norm(c)
         a = self.P @ np.diag(self.D) @ c
         u = self.update(a, self.c[0])
@@ -577,7 +545,7 @@ def __init__(
             self,
             max_iter: int = 100,
             tol=1e-9,
-            initialization: str = "unregularized",
+            initial_scores: np.ndarray = None,
             regularisation="l0",
             c=None,
             sample_support: int = None,
@@ -587,7 +555,7 @@ def __init__(
         super().__init__(
             max_iter=max_iter,
             tol=tol,
-            initialization=initialization,
+            initial_scores=initial_scores,
             random_state=random_state,
         )
         self.c = c
@@ -601,7 +569,7 @@ def __init__(
     def _check_params(self):
         if self.sample_support is None:
             self.sample_support = self.views[0].shape[0]
-        self.sample_weights = np.ones((self.views[0].shape[0], 1))
+        self.sample_weights = np.ones(self.views[0].shape[0])
         self.sample_weights /= np.linalg.norm(self.sample_weights)
         self.c = _process_parameter("c", self.c, 2, len(self.views))
         self.positive = _process_parameter(
@@ -616,7 +584,7 @@ def _update_view(self, view_index: int):
         targets = np.ma.array(self.scores, mask=False)
         targets.mask[view_index] = True
         self.weights[view_index] = (
-                                           self.views[view_index] * self.sample_weights
+                                           self.views[view_index] * self.sample_weights[:, np.newaxis]
                                    ).T @ targets.sum(axis=0).filled()
         self.weights[view_index] = self.update(
             self.weights[view_index],
diff --git a/cca_zoo/models/iterative.py b/cca_zoo/models/iterative.py
index 083359ef..5be78a6a 100644
--- a/cca_zoo/models/iterative.py
+++ b/cca_zoo/models/iterative.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 
+from cca_zoo.models import PLS, CCA
 from cca_zoo.models.cca_base import _CCA_Base
 from cca_zoo.models.innerloop import (
     PLSInnerLoop,
@@ -25,16 +26,16 @@ class _Iterative(_CCA_Base):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        deflation="cca",
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            max_iter: int = 100,
+            initialization: Union[str, callable] = "random",
+            tol: float = 1e-9,
     ):
         """
         Constructor for _Iterative
@@ -56,12 +57,12 @@ def __init__(
             centre=centre,
             copy_data=copy_data,
             accept_sparse=["csc", "csr"],
+            random_state=random_state,
         )
         self.max_iter = max_iter
         self.initialization = initialization
         self.tol = tol
         self.deflation = deflation
-        self.random_state = random_state
 
     def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         """
@@ -75,7 +76,8 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         views = self._centre_scale(views)
         self.n_views = len(views)
         self.n = views[0].shape[0]
-        self._set_loop_params()
+        if isinstance(self.initialization, str):
+            self.initialization = self._initialization(views, self.initialization)
         n = views[0].shape[0]
         p = [view.shape[1] for view in views]
         # List of d: p x k
@@ -86,10 +88,10 @@ def fit(self, views: Iterable[np.ndarray], y=None, **kwargs):
         self.scores = [np.zeros((n, self.latent_dims)) for _ in views]
 
         residuals = copy.deepcopy(list(views))
-
         self.track = []
         # For each of the dimensions
         for k in range(self.latent_dims):
+            self._set_loop_params()
             self.loop = self.loop._fit(*residuals)
             for i, residual in enumerate(residuals):
                 self.weights[i][:, k] = self.loop.weights[i].ravel()
@@ -128,10 +130,45 @@ def _set_loop_params(self):
         """
         self.loop = PLSInnerLoop(
             max_iter=self.max_iter,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             random_state=self.random_state,
         )
 
+    def _initialization(self, views, initialization):
+        if initialization == "random":
+            while True:
+                yield np.array(
+                    [
+                        self.random_state.normal(0, 1, size=(view.shape[0]))
+                        for view in views
+                    ]
+                )
+        elif initialization == "uniform":
+            while True:
+                yield np.array([np.ones(view.shape[0]) for view in views])
+        elif initialization == "pls":
+            latent_dim = 0
+            pls_scores = PLS(self.latent_dims).fit_transform(views)
+            pls_weights = PLS(self.latent_dims).fit(views).weights
+            print()
+            while True:
+                yield np.stack(
+                    (pls_scores[0][:, latent_dim], pls_scores[1][:, latent_dim])
+                )
+                latent_dim += 1
+        elif initialization == "cca":
+            latent_dim = 0
+            cca_scores = CCA(self.latent_dims).fit_transform(views)
+            while True:
+                yield np.stack(
+                    (cca_scores[0][:, latent_dim], cca_scores[1][:, latent_dim])
+                )
+                latent_dim += 1
+        else:
+            raise ValueError(
+                "Initialization {type} not supported. Pass a generator implementing this method"
+            )
+
 
 class PLS_ALS(_Iterative):
     r"""
@@ -167,7 +204,7 @@ def __init__(
             copy_data=True,
             random_state=None,
             max_iter: int = 100,
-            initialization: str = "unregularized",
+            initialization: Union[str, callable] = "random",
             tol: float = 1e-9,
     ):
         """
@@ -197,7 +234,7 @@ def __init__(
     def _set_loop_params(self):
         self.loop = PLSInnerLoop(
             max_iter=self.max_iter,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             random_state=self.random_state,
         )
@@ -232,21 +269,21 @@ class ElasticCCA(_Iterative):
     """
 
     def __init__(
-        self,
-        latent_dims: int = 1,
-        scale: bool = True,
-        centre=True,
-        copy_data=True,
-        random_state=None,
-        deflation="cca",
-        max_iter: int = 100,
-        initialization: str = "unregularized",
-        tol: float = 1e-9,
-        c: Union[Iterable[float], float] = None,
-        l1_ratio: Union[Iterable[float], float] = None,
-        maxvar: bool = True,
-        stochastic=False,
-        positive: Union[Iterable[bool], bool] = None,
+            self,
+            latent_dims: int = 1,
+            scale: bool = True,
+            centre=True,
+            copy_data=True,
+            random_state=None,
+            deflation="cca",
+            max_iter: int = 100,
+            initialization: Union[str, callable] = "pls",
+            tol: float = 1e-9,
+            c: Union[Iterable[float], float] = None,
+            l1_ratio: Union[Iterable[float], float] = None,
+            maxvar: bool = True,
+            stochastic=False,
+            positive: Union[Iterable[bool], bool] = None,
     ):
         """
         Constructor for ElasticCCA
@@ -294,7 +331,7 @@ def _set_loop_params(self):
             c=self.c,
             l1_ratio=self.l1_ratio,
             maxvar=self.maxvar,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             stochastic=self.stochastic,
             positive=self.positive,
@@ -415,7 +452,7 @@ def __init__(
             c: Union[Iterable[float], float] = None,
             max_iter: int = 100,
             maxvar: bool = False,
-            initialization: str = "unregularized",
+            initialization: Union[str, callable] = "pls",
             tol: float = 1e-9,
             stochastic=False,
             positive: Union[Iterable[bool], bool] = None,
@@ -494,7 +531,7 @@ def __init__(
             deflation="cca",
             c: Union[Iterable[float], float] = None,
             max_iter: int = 100,
-            initialization: str = "unregularized",
+            initialization: Union[str, callable] = "pls",
             tol: float = 1e-9,
             positive: Union[Iterable[bool], bool] = None,
     ):
@@ -530,7 +567,7 @@ def _set_loop_params(self):
         self.loop = PMDInnerLoop(
             max_iter=self.max_iter,
             c=self.c,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             positive=self.positive,
             random_state=self.random_state,
@@ -575,7 +612,7 @@ def __init__(
             deflation="cca",
             c: Union[Iterable[float], float] = None,
             max_iter: int = 100,
-            initialization: str = "unregularized",
+            initialization: Union[str, callable] = "pls",
             tol: float = 1e-9,
     ):
         """
@@ -608,7 +645,7 @@ def _set_loop_params(self):
         self.loop = ParkhomenkoInnerLoop(
             max_iter=self.max_iter,
             c=self.c,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             random_state=self.random_state,
         )
@@ -655,7 +692,7 @@ def __init__(
             lam: Union[Iterable[float], float] = None,
             eta: Union[Iterable[float], float] = None,
             max_iter: int = 100,
-            initialization: str = "unregularized",
+            initialization: Union[str, callable] = "pls",
             tol: float = 1e-9,
     ):
         """
@@ -697,7 +734,7 @@ def _set_loop_params(self):
             mu=self.mu,
             lam=self.lam,
             eta=self.eta,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             random_state=self.random_state,
         )
@@ -783,7 +820,7 @@ def _set_loop_params(self):
         self.loop = SpanCCAInnerLoop(
             max_iter=self.max_iter,
             c=self.c,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             regularisation=self.regularisation,
             rank=self.rank,
@@ -861,7 +898,7 @@ def __init__(
     def _set_loop_params(self):
         self.loop = SWCCAInnerLoop(
             max_iter=self.max_iter,
-            initialization=self.initialization,
+            initial_scores=next(self.initialization),
             tol=self.tol,
             regularisation=self.regularisation,
             c=self.c,
diff --git a/cca_zoo/test/test_models.py b/cca_zoo/test/test_models.py
index e9e17d0f..89a72066 100644
--- a/cca_zoo/test/test_models.py
+++ b/cca_zoo/test/test_models.py
@@ -40,12 +40,8 @@ def test_unregularized_methods():
     # Tests unregularized CCA methods. The idea is that all of these should give the same result.
     latent_dims = 2
     cca = CCA(latent_dims=latent_dims).fit([X, Y])
-    iter = CCA_ALS(
-        latent_dims=latent_dims, tol=1e-9, random_state=rng, stochastic=False
-    ).fit([X, Y])
-    iter_pls = PLS_ALS(
-        latent_dims=latent_dims, tol=1e-9, initialization="unregularized", centre=False
-    ).fit([X, Y])
+    iter = CCA_ALS(latent_dims=latent_dims, tol=1e-9, stochastic=False).fit([X, Y])
+    iter_pls = PLS_ALS(latent_dims=latent_dims, tol=1e-9, centre=False).fit([X, Y])
     gcca = GCCA(latent_dims=latent_dims).fit([X, Y])
     mcca = MCCA(latent_dims=latent_dims, eps=1e-9).fit([X, Y])
     kcca = KCCA(latent_dims=latent_dims).fit([X, Y])
@@ -125,9 +121,9 @@ def test_sparse_input():
     iter = CCA_ALS(
         latent_dims=latent_dims, tol=1e-9, stochastic=False, centre=False
     ).fit((X_sp, Y_sp))
-    iter_pls = PLS_ALS(
-        latent_dims=latent_dims, tol=1e-9, initialization="unregularized", centre=False
-    ).fit((X_sp, Y_sp))
+    iter_pls = PLS_ALS(latent_dims=latent_dims, tol=1e-9, centre=False).fit(
+        (X_sp, Y_sp)
+    )
     gcca = GCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
     mcca = MCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
     kcca = KCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
@@ -190,15 +186,21 @@ def test_regularized_methods():
 def test_non_negative_methods():
     latent_dims = 2
     nnscca = SCCA(
-        latent_dims=latent_dims, tol=1e-9, positive=True, c=[1e-1, 1e-1], random_state=0
+        latent_dims=latent_dims,
+        tol=1e-9,
+        positive=True,
+        c=[1e-6, 1e-6],
+        random_state=0,
+        initialization="random",
     ).fit((X, Y))
     nnelastic = ElasticCCA(
         latent_dims=latent_dims,
         tol=1e-9,
         positive=True,
         l1_ratio=[0.5, 0.5],
-        c=[1e-4, 1e-5],
+        c=[1e-5, 1e-5],
         random_state=0,
+        initialization="random",
     ).fit([X, Y])
     nnals = CCA_ALS(
         latent_dims=latent_dims, tol=1e-9, positive=True, random_state=0

From 79b993a1d85133ea4068e4deb6d90889bc2ea148 Mon Sep 17 00:00:00 2001
From: James Chapman <james.chapman.19@ucl.ac.uk>
Date: Wed, 1 Dec 2021 18:28:10 +0000
Subject: [PATCH 45/45] Initialization change for iterative methods

---
 cca_zoo/models/innerloop.py | 16 +++++-----------
 cca_zoo/test/test_models.py |  4 ++--
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/cca_zoo/models/innerloop.py b/cca_zoo/models/innerloop.py
index 8d70b8da..fc6ea67b 100644
--- a/cca_zoo/models/innerloop.py
+++ b/cca_zoo/models/innerloop.py
@@ -170,18 +170,12 @@ def _check_params(self):
                 "c parameter not set. Setting to c=1 i.e. maximum regularisation of l1 norm"
             )
         self.c = _process_parameter("c", self.c, 1, len(self.views))
-        if any(c < 1 for c in self.c):
+        if any(c < 0 or c > 1 for c in self.c):
             raise ValueError(
-                "All regulariation parameters should be at least " f"1. c=[{self.c}]"
+                "All regularisation parameters should be between 0 and 1 " f"1. c=[{self.c}]"
             )
         shape_sqrts = [np.sqrt(view.shape[1]) for view in self.views]
-        if any(c > shape_sqrt for c, shape_sqrt in zip(self.c, shape_sqrts)):
-            raise ValueError(
-                "All regulariation parameters should be less than"
-                " the square root of number of the respective"
-                f" view. c=[{self.c}], limit of each view: "
-                f"{shape_sqrts}"
-            )
+        self.t = [max(1, x * y) for x, y in zip(self.c, shape_sqrts)]
         self.positive = _process_parameter(
             "positive", self.positive, False, len(self.views)
         )
@@ -199,7 +193,7 @@ def _update_view(self, view_index: int):
         )
         self.weights[view_index] = _delta_search(
             self.weights[view_index],
-            self.c[view_index],
+            self.t[view_index],
             positive=self.positive[view_index],
             tol=self.tol,
         )
@@ -674,7 +668,7 @@ def _delta_search(w, c, positive=False, init=0, tol=1e-9):
             current, previous, current_val, previous_val, min_, max_
         )
         previous_val = current_val
-        if np.abs(current_val) < tol or np.abs(max_ - min_) < tol or i == 50:
+        if np.abs(current_val) < tol or np.abs(max_ - min_) < tol or i == 150:
             converged = True
     return coef
 
diff --git a/cca_zoo/test/test_models.py b/cca_zoo/test/test_models.py
index 89a72066..572b96e0 100644
--- a/cca_zoo/test/test_models.py
+++ b/cca_zoo/test/test_models.py
@@ -214,8 +214,8 @@ def test_non_negative_methods():
 
 
 def test_sparse_methods():
-    c1 = [1, 3]
-    c2 = [1, 3]
+    c1 = [0.1, 0.3]
+    c2 = [0.1, 0.3]
     param_grid = {"c": [c1, c2]}
     pmd_cv = GridSearchCV(PMD(random_state=rng), param_grid=param_grid).fit([X, Y])
     cv_plot(pmd_cv.cv_results_)