From 64afb936214d267388c54006ffeeef76ce52421e Mon Sep 17 00:00:00 2001
From: Philip Loche <ploche@physik.fu-berlin.de>
Date: Mon, 25 Mar 2024 16:43:48 +0100
Subject: [PATCH 1/9] always use current year for docs footer

---
 docs/src/conf.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/src/conf.py b/docs/src/conf.py
index 507b5601..7e80773e 100644
--- a/docs/src/conf.py
+++ b/docs/src/conf.py
@@ -1,3 +1,6 @@
+from datetime import datetime
+
+
 # Add any Sphinx extension module names here, as strings.
 extensions = [
     "sphinx.ext.viewcode",
@@ -9,7 +12,11 @@
 exclude_patterns = ["_build"]
 
 project = "cosmo-software-cookbook"
-copyright = "BSD 3-Clause License, Copyright (c) 2023, COSMO software cookbook team"
+copyright = (
+    "BSD 3-Clause License, "
+    f"Copyright (c) {datetime.now().date().year}, "
+    "COSMO software cookbook team"
+)
 
 htmlhelp_basename = "COSMO software-cookbook"
 html_theme = "furo"

From 1528bd75e1b2074b29536eac720d7cd69535b256 Mon Sep 17 00:00:00 2001
From: Luthaf <luthaf@luthaf.fr>
Date: Tue, 26 Mar 2024 11:55:16 +0100
Subject: [PATCH 2/9] Re-enable cp2k example

---
 .github/workflows/docs.yml                    |   2 +-
 .../batch-cp2k/.gitignore                     |   2 +-
 .../batch-cp2k/README.rst                     |   0
 .../batch-cp2k/environment.yml                |   2 +
 .../batch-cp2k/example.xyz                    |   0
 .../batch-cp2k/reference-trajectory.py        | 109 +++++++-----------
 .../batch-cp2k/reftraj_template.cp2k          |  10 +-
 .../batch-cp2k/run_calcs.sh                   |   2 +-
 8 files changed, 51 insertions(+), 76 deletions(-)
 rename {deactivated => examples}/batch-cp2k/.gitignore (65%)
 rename {deactivated => examples}/batch-cp2k/README.rst (100%)
 rename {deactivated => examples}/batch-cp2k/environment.yml (74%)
 rename {deactivated => examples}/batch-cp2k/example.xyz (100%)
 rename {deactivated => examples}/batch-cp2k/reference-trajectory.py (78%)
 rename {deactivated => examples}/batch-cp2k/reftraj_template.cp2k (91%)
 rename {deactivated => examples}/batch-cp2k/run_calcs.sh (78%)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 6cb696d9..a90e7fa8 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -19,7 +19,7 @@ jobs:
           - roy-gch
           - sample-selection
           - gaas-map
-          # - batch-cp2k
+          - batch-cp2k
 
     steps:
       - uses: actions/checkout@v4
diff --git a/deactivated/batch-cp2k/.gitignore b/examples/batch-cp2k/.gitignore
similarity index 65%
rename from deactivated/batch-cp2k/.gitignore
rename to examples/batch-cp2k/.gitignore
index 9fc7d688..fdb6d2b3 100644
--- a/deactivated/batch-cp2k/.gitignore
+++ b/examples/batch-cp2k/.gitignore
@@ -1,5 +1,5 @@
 production/
+parameters/
 
 cp2k.out
 cp2k.inp
-cp2k_shell.ssmp
diff --git a/deactivated/batch-cp2k/README.rst b/examples/batch-cp2k/README.rst
similarity index 100%
rename from deactivated/batch-cp2k/README.rst
rename to examples/batch-cp2k/README.rst
diff --git a/deactivated/batch-cp2k/environment.yml b/examples/batch-cp2k/environment.yml
similarity index 74%
rename from deactivated/batch-cp2k/environment.yml
rename to examples/batch-cp2k/environment.yml
index 980056b0..fafbc008 100644
--- a/deactivated/batch-cp2k/environment.yml
+++ b/examples/batch-cp2k/environment.yml
@@ -1,3 +1,5 @@
+channels:
+  - conda-forge
 dependencies:
   - python=3.11
   - pip
diff --git a/deactivated/batch-cp2k/example.xyz b/examples/batch-cp2k/example.xyz
similarity index 100%
rename from deactivated/batch-cp2k/example.xyz
rename to examples/batch-cp2k/example.xyz
diff --git a/deactivated/batch-cp2k/reference-trajectory.py b/examples/batch-cp2k/reference-trajectory.py
similarity index 78%
rename from deactivated/batch-cp2k/reference-trajectory.py
rename to examples/batch-cp2k/reference-trajectory.py
index b7293622..9d713ad4 100644
--- a/deactivated/batch-cp2k/reference-trajectory.py
+++ b/examples/batch-cp2k/reference-trajectory.py
@@ -8,34 +8,46 @@
 using `CP2K <https://www.cp2k.org>`_ using its `reftraj functionality
 <https://manual.cp2k.org/trunk/CP2K_INPUT/MOTION/MD/REFTRAJ.html>`_. The inputs are a
 set of structures in :download:`example.xyz` using the DFT parameters defined in
-:download:`reftraj_template.cp2k` importing basis set and pseudopotentials from the
-local CP2K installation. The reference DFT parameters are taken from `Cheng et al. Ab
-initio thermodynamics of liquid and solid water 2019
+:download:`reftraj_template.cp2k`. The reference DFT parameters are taken from `Cheng et
+al. Ab initio thermodynamics of liquid and solid water 2019
 <https://www.pnas.org/doi/10.1073/pnas.1815117116>`_. Due to the small size of the test
 structure and convergence issues, we have decreased the size of the ``CUTOFF_RADIUS``
 from :math:`6.0\,\mathrm{Å}` to :math:`3.0\,\mathrm{Å}`. For actual production
 calculations adapt the template!
-
-To run this example, we use a bare executable called with ``cp2k``. If you want to use
-another version you can either adjust the the names within this example or link your
-binary with a different name to ``cp2k``.
 """
 
 # %%
 # We start the example by importing the required packages.
 
-
 import os
-import re
+import platform
 import subprocess
-from os.path import basename, splitext
 from typing import List, Union
 
 import ase.io
 import ase.visualize.plot
 import matplotlib.pyplot as plt
 import numpy as np
-from ase.calculators.cp2k import CP2K
+import requests
+
+
+# %%
+#
+# Install CP2K
+# ------------
+#
+# We'll need a working installation of cp2k. The best way to do so depends on your
+# platform, here are some possible solutions, but feel free to replace them with another
+# installation method.
+
+if platform.system() == "Linux":
+    # use conda on Linux
+    subprocess.run(["conda", "install", "cp2k", "-c", "conda-forge", "-y"], check=True)
+elif platform.system() == "Darwin":
+    # use homebrew on macOS
+    subprocess.run(["brew", "install", "cp2k"], check=True)
+else:
+    print("no known way to install cp2k, skipping installation")
 
 
 # %%
@@ -136,17 +148,25 @@ def write_cp2k_in(
 
 
 # %%
+#
+# We will now download basis set files from CP2K website. Depending on your CP2K
+# installation, this might not be necessary!
 
 
-def mkdir_force(*args, **kwargs) -> None:
-    """Warpper to ``os.mkdir``.
+def download_parameter(file):
+    path = os.path.join("parameters", file)
+
+    if not os.path.exists(path):
+        url = f"https://raw.githubusercontent.com/cp2k/cp2k/master/data/{file}"
+        response = requests.get(url)
+        response.raise_for_status()
+        with open(path, "wb") as f:
+            f.write(response.content)
 
-    The function does not raise an error if the directory already exists.
-    """
-    try:
-        os.mkdir(*args, **kwargs)
-    except OSError:
-        pass
+
+os.makedirs("parameters", exist_ok=True)
+for file in ["GTH_BASIS_SETS", "BASIS_ADMM", "POTENTIAL", "dftd3.dat", "t_c_g.dat"]:
+    download_parameter(file)
 
 
 # %%
@@ -200,11 +220,9 @@ def mkdir_force(*args, **kwargs) -> None:
 # directory named ``H4O2`` because our dataset consists only of a single structure with
 # two water molecules.
 
-mkdir_force(project_directory)
-
 for stoichiometry, frames in frames_dict.items():
     current_directory = f"{project_directory}/{stoichiometry}"
-    mkdir_force(current_directory)
+    os.makedirs(current_directory, exist_ok=True)
 
     write_cp2k_in(
         f"{current_directory}/in.cp2k",
@@ -281,50 +299,5 @@ def mkdir_force(*args, **kwargs) -> None:
 
     new_frames += frames_dft
 
-new_fname = f"{splitext(basename(write_to_file))[0]}_dft.xyz"
+new_fname = f"{os.path.splitext(os.path.basename(write_to_file))[0]}_dft.xyz"
 ase.io.write(f"{project_directory}/{new_fname}", new_frames)
-
-# %%
-# Perform calculations using ASE calculator
-# -----------------------------------------
-# Above we performed the calculations using an external bash script. ASE also provides a
-# calculator class that we can use the perform the calculations with our input file
-# without a detour of writing files to disk.
-#
-# To use the ASE calculator together with a custom input script this requires some
-# adjustments. First the name of the executable that has the exact name ``cp2k_shell``.
-# We create a symlink to follow this requirement.
-
-# %%
-# Next, we load the input file abd remove ``GLOBAL`` section because from it
-
-inp = open("./production/H4O2/in.cp2k", "r").read()
-inp = re.sub(
-    f"{re.escape('&GLOBAL')}.*?{re.escape('&END GLOBAL')}", "", inp, flags=re.DOTALL
-)
-
-# %%
-# Afterwards we define the :py:class:`ase.calculators.cp2k.CP2K`` calculator. Note that
-# we disable all parameters because we want to use all options from our input file
-
-calc = CP2K(
-    inp=inp,
-    max_scf=None,
-    cutoff=None,
-    xc=None,
-    force_eval_method=None,
-    basis_set=None,
-    pseudo_potential=None,
-    basis_set_file=None,
-    potential_file=None,
-    stress_tensor=False,
-    poisson_solver=None,
-    print_level=None,
-)
-
-# %%
-# We now load a new structure, add the calculator and perform the computation.
-
-atoms = ase.io.read("example.xyz")
-atoms.set_calculator(calc)
-# atoms.get_potential_energy()
diff --git a/deactivated/batch-cp2k/reftraj_template.cp2k b/examples/batch-cp2k/reftraj_template.cp2k
similarity index 91%
rename from deactivated/batch-cp2k/reftraj_template.cp2k
rename to examples/batch-cp2k/reftraj_template.cp2k
index 8792d232..fe83ba8d 100644
--- a/deactivated/batch-cp2k/reftraj_template.cp2k
+++ b/examples/batch-cp2k/reftraj_template.cp2k
@@ -52,9 +52,9 @@
     &END FORCES
   &END PRINT
   &DFT
-    BASIS_SET_FILE_NAME GTH_BASIS_SETS
-    BASIS_SET_FILE_NAME BASIS_ADMM
-    POTENTIAL_FILE_NAME POTENTIAL
+    BASIS_SET_FILE_NAME ../../parameters/GTH_BASIS_SETS
+    BASIS_SET_FILE_NAME ../../parameters/BASIS_ADMM
+    POTENTIAL_FILE_NAME ../../parameters/POTENTIAL
     &MGRID
       CUTOFF 400
     &END MGRID
@@ -97,7 +97,7 @@
         &INTERACTION_POTENTIAL
           POTENTIAL_TYPE TRUNCATED
           CUTOFF_RADIUS 3.0
-          T_C_G_DATA t_c_g.dat
+          T_C_G_DATA ../../parameters/t_c_g.dat
         &END
         &HF_INFO
         &END HF_INFO
@@ -109,7 +109,7 @@
             R_CUTOFF 15
             LONG_RANGE_CORRECTION TRUE
             REFERENCE_FUNCTIONAL revPBE0
-            PARAMETER_FILE_NAME dftd3.dat
+            PARAMETER_FILE_NAME ../../parameters/dftd3.dat
          &END
       &END
       &XC_GRID
diff --git a/deactivated/batch-cp2k/run_calcs.sh b/examples/batch-cp2k/run_calcs.sh
similarity index 78%
rename from deactivated/batch-cp2k/run_calcs.sh
rename to examples/batch-cp2k/run_calcs.sh
index 03c9c390..59a8ff53 100644
--- a/deactivated/batch-cp2k/run_calcs.sh
+++ b/examples/batch-cp2k/run_calcs.sh
@@ -2,6 +2,6 @@
 
 for i in $(find ./production/ -mindepth 1 -type d); do
     cd $i
-    cp2k -i in.cp2k
+    cp2k.ssmp -i in.cp2k
     cd -
 done

From 3a073a0c2c8ead4bde227652b2a2f87d86eafb0f Mon Sep 17 00:00:00 2001
From: Guillaume Fraux <luthaf@luthaf.fr>
Date: Tue, 26 Mar 2024 12:34:21 +0100
Subject: [PATCH 3/9] Update examples/batch-cp2k/reference-trajectory.py

Co-authored-by: Philip Loche <ploche@physik.fu-berlin.de>
---
 examples/batch-cp2k/reference-trajectory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/batch-cp2k/reference-trajectory.py b/examples/batch-cp2k/reference-trajectory.py
index 9d713ad4..84fa6cc7 100644
--- a/examples/batch-cp2k/reference-trajectory.py
+++ b/examples/batch-cp2k/reference-trajectory.py
@@ -157,7 +157,7 @@ def download_parameter(file):
     path = os.path.join("parameters", file)
 
     if not os.path.exists(path):
-        url = f"https://raw.githubusercontent.com/cp2k/cp2k/master/data/{file}"
+        url = f"https://raw.githubusercontent.com/cp2k/cp2k/support/v2024.1/data/{file}"
         response = requests.get(url)
         response.raise_for_status()
         with open(path, "wb") as f:

From a03b26af477f01684e9751fbb6480fccd5ab9682 Mon Sep 17 00:00:00 2001
From: HannaTuerk <64609057+HannaTuerk@users.noreply.github.com>
Date: Wed, 27 Mar 2024 13:09:36 +0100
Subject: [PATCH 4/9] Improve Feature Selection (#47)

* stub script sample selection with rascaline

* rascaline soap powerspectrum - first part

* fps with equisolve

* rough draft

* update to new name of metatensor

* updated documentation and output statements

* Sample selection updated to equisolve

* Cleanup sample selection with equisolve

* clean-up sample selection with equisolve

* Updated environment with equisolve

* Changed skmatter feat_selection import to please nox

* Fixed underline in sample-selection to be long enough for docs

---------

Co-authored-by: Joseph Abbott <joseph.william.abbott@gmail.com>
Co-authored-by: hannatuerk <hct@hanna-tuerk.de>
---
 examples/sample-selection/environment.yml     |   1 +
 examples/sample-selection/sample-selection.py | 229 ++++++++++++++----
 2 files changed, 182 insertions(+), 48 deletions(-)

diff --git a/examples/sample-selection/environment.yml b/examples/sample-selection/environment.yml
index 95bed51e..7e2cf3b9 100644
--- a/examples/sample-selection/environment.yml
+++ b/examples/sample-selection/environment.yml
@@ -11,3 +11,4 @@ dependencies:
     - metatensor
     - rascaline @ git+https://github.com/Luthaf/rascaline@ca957642f512e141c7570e987aadc05c7ac71983
     - skmatter
+    - equisolve @ git+https://github.com/lab-cosmo/equisolve.git@c858bedef4b2799eb445e4c92535ee387224089a
diff --git a/examples/sample-selection/sample-selection.py b/examples/sample-selection/sample-selection.py
index 34030c64..ad309230 100644
--- a/examples/sample-selection/sample-selection.py
+++ b/examples/sample-selection/sample-selection.py
@@ -16,12 +16,14 @@
 
 import ase.io
 import chemiscope
+import metatensor
 import numpy as np
+from equisolve.numpy import feature_selection, sample_selection
 from matplotlib import pyplot as plt
-from metatensor import mean_over_samples
+from metatensor import sum_over_samples
 from rascaline import SoapPowerSpectrum
 from sklearn.decomposition import PCA
-from skmatter import feature_selection, sample_selection
+from skmatter import feature_selection as skfeat_selection
 
 
 # %%
@@ -57,54 +59,151 @@
 # Generate a SOAP power spectrum
 calculator = SoapPowerSpectrum(**hypers)
 rho2i = calculator.compute(frames)
+
+
 # Makes a dense block
-rho2i = rho2i.keys_to_samples(["species_center"]).keys_to_properties(
-    ["species_neighbor_1", "species_neighbor_2"]
+atom_soap = rho2i.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])
+
+atom_soap_single_block = atom_soap.keys_to_samples(keys_to_move=["species_center"])
+
+# print(atom_soap_single_block)
+# print(atom_soap_single_block.block(0))  # There is only one block now!
+
+# Sum over atomic centers to compute structure features
+struct_soap = sum_over_samples(
+    atom_soap_single_block, sample_names=["center", "species_center"]
 )
-# Averages over atomic centers to compute structure features
-rho2i_structure = mean_over_samples(rho2i, sample_names=["center", "species_center"])
 
-atom_dscrptr = rho2i.block(0).values
-struct_dscrptr = rho2i_structure.block(0).values
 
-print("atom feature descriptor shape:", atom_dscrptr.shape)
-print("structure feature descriptor shape:", struct_dscrptr.shape)
+print("atom feature descriptor shape:", atom_soap.block(0).values.shape)
+print(
+    "atom feature descriptor (all in one block) shape:",
+    atom_soap_single_block.block(0).values.shape,
+)
+print("structure feature descriptor shape:", struct_soap.block(0).values.shape)
 
 
 # %%
-# Perform structure (i.e. sample) selection
-# -----------------------------------------
+# Perform atomic environment (i.e. sample) selection
+# ---------------------------------------------------
 #
-# Using FPS and CUR algorithms implemented in scikit-matter, select a subset of
-# the structures. skmatter assumes that our descriptor is represented as a 2D
-# matrix, with the samples along axis 0 and features along axis 1.
+# Using FPS and CUR algorithms, we can perform selection of atomic environments.
+# These are implemented in equisolve, which provides a wrapper around
+# scikit-matter to allow for interfacing with data stored in the metatensor
+# format.
 #
-# For more info on the functions: `skmatter
-# <https://scikit-cosmo.readthedocs.io/en/latest/selection.html>`_
+# Suppose we want to select the 10 most diverse environments for each chemical
+# species.
+#
+# First, we can use the `keys_to_properties` operation in metatensor to move the
+# neighbour species indices to the properties of the TensorBlocks. The resulting
+# descriptor will be a TensorMap comprised of three blocks, one for each
+# chemical species, where the chemical species indices are solely present in the
+# keys.
+
 
+print("----Atomic environment selection-----")
 # Define the number of structures to select using FPS/CUR
-n_structures = 25
+n_envs = 25
+
+print(atom_soap)
+print(atom_soap.block(0))
+
+# %% Now let's perform sample selection on the atomic environments. We want to
+# select 10 atomic environments for each chemical species.
+
+# Define the number of structures *per block* to select using FPS
+n_envs = 10
 
 # FPS sample selection
-struct_fps = sample_selection.FPS(n_to_select=n_structures, initialize="random").fit(
-    struct_dscrptr
+selector_atomic_fps = sample_selection.FPS(n_to_select=n_envs, initialize="random").fit(
+    atom_soap
 )
-struct_fps_idxs = struct_fps.selected_idx_
 
-# CUR sample selection
-struct_cur = sample_selection.CUR(n_to_select=n_structures).fit(struct_dscrptr)
-struct_cur_idxs = struct_cur.selected_idx_
+# Print the selected envs for each block
+print("atomic envs selected with FPS:\n")
+for key, block in selector_atomic_fps.support.items():
+    print("species_center:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
+
+selector_atomic_cur = sample_selection.CUR(n_to_select=n_envs).fit(atom_soap)
+# Print the selected envs for each block
+print("atomic envs selected with CUR:\n")
+for key, block in selector_atomic_cur.support.items():
+    print("species_center:", key, "\n(struct_idx, atom_idx)\n", block.samples.values)
+
+
+# %%
+# Selecting from a combined pool of atomic environments
+# -----------------------------------------------------
+#
+# One can also select from a combined pool of atomic environments and
+# structures, instead of selecting an equal number of atomic environments for
+# each chemical species. In this case, we can move the 'species_center' key to samples
+# such that our descriptor is a TensorMap consisting of a single block. Upon
+# sample selection, the most diverse atomic environments will be selected,
+# regardless of their chemical species.
+print("----All atomic environment selection-----")
+
+print("keys", atom_soap.keys)
+print("blocks", atom_soap[0])
+print("samples in first block", atom_soap[0].samples)
+
+# Using the original SOAP descriptor, move all keys to properties.
+
+
+# Define the number of structures to select using FPS
+n_envs = 10
+
+# FPS sample selection
+selector_atomic_fps = sample_selection.FPS(n_to_select=n_envs, initialize="random").fit(
+    atom_soap_single_block
+)
+print(
+    "atomic envs selected with FPS: \n (struct_idx, atom_idx, species_center) \n",
+    selector_atomic_fps.support.block(0).samples.values,
+)
+
+
+# %%
+# Perform structure (i.e. sample) selection with FPS/CUR
+# ---------------------------------------------------------
+#
+# Instead of atomic environments, one can also select diverse structures. We can
+# use the `sum_over_samples` operation in metatensor to define features in the
+# structural basis instead of the atomic basis. This is done by summing over the
+# atomic environments, labeled by the 'center' index in the samples of the
+# TensorMap.
+#
+# Alternatively, one could use the `mean_over_samples` operation, depending on
+# the specific inhomogeneity of the size of the structures in the training set.
+
+print("----Structure selection-----")
+
+# Define the number of structures to select *per block* using FPS
+n_structures = 10
+
+# FPS structure selection
+selector_struct_fps = sample_selection.FPS(
+    n_to_select=n_structures, initialize="random"
+).fit(struct_soap)
+struct_fps_idxs = selector_struct_fps.support.block(0).samples.values.flatten()
+
+print("structures selected with FPS:\n", struct_fps_idxs)
+
+# CUR structure selection
+selector_struct_cur = sample_selection.CUR(n_to_select=n_structures).fit(struct_soap)
+struct_cur_idxs = selector_struct_cur.support.block(0).samples.values.flatten()
+print("structures selected with CUR:\n", struct_cur_idxs)
 
-print("Structure indices obtained with FPS ", struct_fps_idxs)
-print("Structure indices obtained with CUR ", struct_cur_idxs)
 
 # Slice structure descriptor along axis 0 to contain only the selected structures
-struct_dscrptr_fps = struct_dscrptr[struct_fps_idxs, :]
-struct_dscrptr_cur = struct_dscrptr[struct_cur_idxs, :]
-assert struct_dscrptr_fps.shape == struct_dscrptr_cur.shape
+struct_soap_fps = struct_soap.block(0).values[struct_fps_idxs, :]
+struct_soap_cur = struct_soap.block(0).values[struct_cur_idxs, :]
+assert struct_soap_fps.shape == struct_soap_cur.shape
 
-print("Structure descriptor shape before selection ", struct_dscrptr.shape)
-print("Structure descriptor shape after selection ", struct_dscrptr_fps.shape)
+print("Structure descriptor shape before selection ", struct_soap.block(0).values.shape)
+print("Structure descriptor shape after selection (FPS)", struct_soap_fps.shape)
+print("Structure descriptor shape after selection (CUR)", struct_soap_cur.shape)
 
 
 # %%
@@ -120,8 +219,8 @@
 
 
 # Generate a structure PCA
-struct_dscrptr_pca = PCA(n_components=2).fit_transform(struct_dscrptr)
-assert struct_dscrptr_pca.shape == (n_frames, 2)
+struct_soap_pca = PCA(n_components=2).fit_transform(struct_soap.block(0).values)
+assert struct_soap_pca.shape == (n_frames, 2)
 
 
 # %%
@@ -133,16 +232,10 @@
 
 # Matplotlib plot
 fig, ax = plt.subplots(1, 1, figsize=(6, 4))
-scatter = ax.scatter(struct_dscrptr_pca[:, 0], struct_dscrptr_pca[:, 1], c="red")
-ax.plot(
-    struct_dscrptr_pca[struct_cur_idxs, 0],
-    struct_dscrptr_pca[struct_cur_idxs, 1],
-    "kx",
-    label="CUR selection",
-)
+scatter = ax.scatter(struct_soap_pca[:, 0], struct_soap_pca[:, 1], c="red")
 ax.plot(
-    struct_dscrptr_pca[struct_fps_idxs, 0],
-    struct_dscrptr_pca[struct_fps_idxs, 1],
+    struct_soap_pca[struct_cur_idxs, 0],
+    struct_soap_pca[struct_cur_idxs, 1],
     "ko",
     fillstyle="none",
     label="FPS selection",
@@ -181,13 +274,12 @@
 
 properties.update(
     {
-        "PC1": struct_dscrptr_pca[:, 0],
-        "PC2": struct_dscrptr_pca[:, 1],
+        "PC1": struct_soap_pca[:, 0],
+        "PC2": struct_soap_pca[:, 1],
         "selection": np.array(selection_levels),
     }
 )
 
-print(properties)
 
 # Display with chemiscope. This currently does not work - as raised in issue #8
 # https://github.com/lab-cosmo/software-cookbook/issues/8
@@ -221,22 +313,63 @@
 # Now perform feature selection. In this example we will go back to using the
 # descriptor decomposed into atomic environments, as opposed to the one
 # decomposed into structure environments, but only use FPS for brevity.
+print("----Feature selection-----")
 
 # Define the number of features to select
 n_features = 200
 
 # FPS feature selection
 feat_fps = feature_selection.FPS(n_to_select=n_features, initialize="random").fit(
-    atom_dscrptr
+    atom_soap_single_block
+)
+
+# Slice atomic descriptor along axis 1 to contain only the selected features
+# atom_soap_single_block_fps = atom_soap_single_block.block(0).values[:, feat_fps_idxs]
+atom_soap_single_block_fps = metatensor.slice(
+    atom_soap_single_block,
+    axis="properties",
+    labels=feat_fps.support.block(0).properties,
+)
+
+print(
+    "atomic descriptor shape before selection ",
+    atom_soap_single_block.block(0).values.shape,
+)
+print(
+    "atomic descriptor shape after selection ",
+    atom_soap_single_block_fps.block(0).values.shape,
+)
+
+# %%
+
+# %%
+# Perform feature selection (skmatter)
+# ------------------------------------
+#
+# Now perform feature selection. In this example we will go back to using the
+# descriptor decomposed into atomic environments, as opposed to the one
+# decomposed into structure environments, but only use FPS for brevity.
+
+print("----Feature selection (skmatter)-----")
+
+# Define the number of features to select
+n_features = 200
+
+# FPS feature selection
+feat_fps = skfeat_selection.FPS(n_to_select=n_features, initialize="random").fit(
+    atom_soap_single_block.block(0).values
 )
 feat_fps_idxs = feat_fps.selected_idx_
 
 print("Feature indices obtained with FPS ", feat_fps_idxs)
 
 # Slice atomic descriptor along axis 1 to contain only the selected features
-atom_dscrptr_fps = atom_dscrptr[:, feat_fps_idxs]
+atom_dscrptr_fps = atom_soap_single_block.block(0).values[:, feat_fps_idxs]
 
-print("atomic descriptor shape before selection ", atom_dscrptr.shape)
+print(
+    "atomic descriptor shape before selection ",
+    atom_soap_single_block.block(0).values.shape,
+)
 print("atomic descriptor shape after selection ", atom_dscrptr_fps.shape)
 
 # %%

From c5fc8a0299275bed43cb59b9ab48e50362b6001a Mon Sep 17 00:00:00 2001
From: Guillaume Fraux <luthaf@luthaf.fr>
Date: Wed, 27 Mar 2024 14:40:41 +0100
Subject: [PATCH 5/9] Update contributing docs (#52)

---
 CONTRIBUTING.rst | 106 ++++++++++++++++++++++++++++-------------------
 1 file changed, 64 insertions(+), 42 deletions(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 689534a0..12a1693b 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -1,82 +1,103 @@
 Contributing
 ============
 
-Contributions are welcome, and they are greatly appreciated! Every little bit helps, and
-credit will always be given. You can contribute in the ways listed below.
+Contributions are welcome, and they are greatly appreciated! Every little bit
+helps, and credit will always be given. You can contribute in the ways listed
+below.
 
 Requirements for new contributions
 ----------------------------------
 
-All code included in this repository is executed in each pull request. This ensures that
-the code in this repository stays executable for a longer time frame. Because of that we
-do not want to have examples with heavy calculations that take more than 30 seconds to
-execute. If heavy calculations are needed, it might be a better option to put your
-example in an external repository and link to it on the `Wiki page
-<https://github.com/lab-cosmo/software-cookbook/wiki>`_. If you feel unsure if a
-contribution is suitable, feel free to contact one of the `support`_ before.
+All code included in this repository is executed in each pull request. This
+ensures that the code in this repository stays executable for a longer time
+frame. Because of that we do not want to have examples with heavy calculations
+that take more than 30 to 1 min seconds to execute. If you feel unsure if a
+contribution is suitable, feel free to contact one of the `support`_ person
+beforehand.
 
 Adding a new examples
 ---------------------
 
-To visualize examples on our readthedocs page we use `sphinx-gallery`. When building the
-doc the examples are run and compiled automatically into HTML files and moved to the
-documentation folder `docs/src <docs/src>`_. You will find all the examples Python
-scripts in the `examples/` folder of the repository. Each example is put into one of the
-example category folders, e.g. `examples/sample_selection <examples/sample_selection>`_.
-If you do not know where to put your example, just put in the `examples/uncategorized
-<examples/uncategorized>`_ folder and when doing a pull request, we will figure out
-where to put it.
+The examples in this repository are python files that we render for the website
+using `sphinx-gallery`_. In short, these are python files containing comments
+formatted as `RestructuredText`_, which are executed, and then the comments,
+code and outputs (including plots, ``print`` outputs, etc.) are assembled in a
+single HTML webpage.
 
-After adding a file, you'll need to update ``tox.ini`` to build your example when
-building the documentation. Look how it's done for the ``lode_linear`` example, and
-do the same for yours!
+To add a new example, you'll need to create a new folder in example (substitute
+``<example-name>`` with the folder name in the instructions below), and add the
+following files inside:
+
+- ``README.rst``, can be empty or can contain a short description of the example;
+- ``environment.yml``, a `conda`_ environment file containing the list of
+  dependencies needed by your example;
+- as many Python files as you want, each one will be converted to a separate
+  HTML page.
+
+.. _sphinx-gallery: https://sphinx-gallery.github.io/
+.. _RestructuredText: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
+.. _conda: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+Finally, you'll need to add your example to the list so it is automatically
+build on CI. The list is in the ``.github/workflows/docs.yml`` file, near the
+``example-name:`` section.
 
 Converting a Jupyter notebook to a sphinx-gallery compatible Python script
 --------------------------------------------------------------------------
 
-Often it is more convenient to work in a Jupyter notebook and convert in later to
-sphinx-gallery example. To convert your Jupyter notebook you can just use the
-`ipynb-to-gallery.py <ipynb_to_gallery.py>`_ file that is root folder of the repository
+Often it is more convenient to work in a Jupyter notebook and convert in later
+to sphinx-gallery example. To convert your Jupyter notebook you can use the
+`ipynb-to-gallery.py <ipynb_to_gallery.py>`_ file that is root folder of the
+repository
 
 .. code-block:: bash
 
     python ipynb-to-gallery.py <notebook.ipynb>
 
-Building the cookbook locally
------------------------------
+Running your example and visualizing the HTML
+---------------------------------------------
 
-When you add a new example, you can run the linter (code format checker) and build the
-doc to check if your code runs with
+We use `nox`_ as a task runner to run all examples and assemble the final
+documentation. You can install it with ``pip install nox``.
+
+To run your example and make sure it conforms to the expected code formatting,
+you can use the following commands:
 
 .. code-block:: bash
 
-    tox
+    # execute the example and render it to HTML
+    nox -e <example-name>
+
+    # check the code formatting
+    nox -e lint
+
+To visualize the generated cookbook open ``docs/build/html/index.html`` in a web
+browser.
 
-If there are formatting errors appearing you can format your file automatically with
+If there are formatting errors you can try to fix them automatically with:
 
 .. code-block:: bash
 
-    tox -e format
+    nox -e format
 
-That should fix most of the formatting issues automatically. If there are still
-formatting issues remaining, then the reviewer of your pull request can fix them.
-To visualize the generated cookbook open in a browser the file
-``docs/build/html/index.html``.
+You can also build all examples (warning, this will take quite some time) with:
+
+.. code-block:: bash
+
+    nox -e docs
+
+.. _nox: https://nox.thea.codes/
 
 Known issues
 ------------
 
-Sometimes the doc preview from readthedocs is not rendered correctly. If something works
-in your local build but not in the readthedocs PR preview. It could that the issue is
-fixed once you merge with the main branch.
-
 Chemiscope widgets are not currently integrated into our sphinx gallery.
 
 Support
 -------
 
-If you still have problems adding your example to the repository, please feel free to
-contact one of the people
+If you still have problems adding your example to the repository, please feel
+free to contact one of the people
 
 `@agoscinski (Alexander Goscinski) <alexander.goscinski@epfl.ch>`_
 
@@ -85,5 +106,6 @@ contact one of the people
 Code of Conduct
 ---------------
 
-Please note that the COSMO cookbook project is released with a `Contributor Code of
-Conduct <CONDUCT.md>`_. By contributing to this project you agree to abide by its terms.
+Please note that the COSMO cookbook project is released with a `Contributor Code
+of Conduct <CONDUCT.md>`_. By contributing to this project you agree to abide by
+its terms.

From ebc1b7e600667419e51258268c167a1c61c31fc7 Mon Sep 17 00:00:00 2001
From: Guillaume Fraux <luthaf@luthaf.fr>
Date: Wed, 27 Mar 2024 15:05:05 +0100
Subject: [PATCH 6/9] Infrastructure updates (#46)

* Add authors to the different examples

* Add download links for environment
---
 docs/src/index.rst.in                         | 12 ++++++++++
 examples/batch-cp2k/reference-trajectory.py   |  3 ++-
 examples/gaas-map/gaas-map.py                 |  3 +++
 examples/lode-linear/lode-linear.py           |  3 +++
 examples/roy-gch/roy-gch.py                   |  2 ++
 examples/sample-selection/sample-selection.py |  2 +-
 generate-gallery.py                           |  2 +-
 noxfile.py                                    | 24 +++++++++++++++++++
 8 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/docs/src/index.rst.in b/docs/src/index.rst.in
index a5137653..f7378cfe 100644
--- a/docs/src/index.rst.in
+++ b/docs/src/index.rst.in
@@ -6,6 +6,18 @@ COSMO Software Cookbook
    :end-before: marker-intro-end
 
 
+All the examples provide an ``environment.yml`` file that you can download and
+then use with conda to create a new environment with all the required
+dependencies for this example.
+
+.. code-block:: bash
+
+   # Pick a name for the environment and replace <environment-name> with it
+   conda env create --name <environment-name> --file environment.yml
+
+   # when you want to use the environment
+   conda env activate --name <environment-name>
+
 .. toctree::
    :caption: Table of Contents
    :maxdepth: 1
diff --git a/examples/batch-cp2k/reference-trajectory.py b/examples/batch-cp2k/reference-trajectory.py
index 84fa6cc7..4fb6e77c 100644
--- a/examples/batch-cp2k/reference-trajectory.py
+++ b/examples/batch-cp2k/reference-trajectory.py
@@ -2,7 +2,8 @@
 Batch run of CP2K calculations
 ==============================
 
-.. start-body
+:Authors: Matthias Kellner `@bananenpampe <https://github.com/bananenpampe/>`_,
+          Philip Loche `@PicoCentauri <https://github.com/PicoCentauri/>`_
 
 This is an example how to perform single point calculations based on list of structures
 using `CP2K <https://www.cp2k.org>`_ using its `reftraj functionality
diff --git a/examples/gaas-map/gaas-map.py b/examples/gaas-map/gaas-map.py
index 00d181b7..c0175812 100644
--- a/examples/gaas-map/gaas-map.py
+++ b/examples/gaas-map/gaas-map.py
@@ -2,6 +2,9 @@
 PCA/PCovR Visualization for the rattled GaAs training dataset
 =============================================================
 
+:Authors: Michele Ceriotti `@ceriottm <https://github.com/ceriottm/>`_,
+          Giulio Imbalzano
+
 This example uses ``rascaline`` and ``metatensor`` to compute
 structural properties for the structures in a training for a ML model.
 These are then used with simple dimensionality reduction algorithms
diff --git a/examples/lode-linear/lode-linear.py b/examples/lode-linear/lode-linear.py
index d8d5c551..23ac620b 100644
--- a/examples/lode-linear/lode-linear.py
+++ b/examples/lode-linear/lode-linear.py
@@ -2,6 +2,9 @@
 LODE Tutorial
 =============
 
+:Authors: Philip Loche `@PicoCentauri <https://github.com/PicoCentauri/>`_,
+          Kevin Huguenin-Dumittan `@kvhuguenin <https://github.com/kvhuguenin>`_
+
 This tutorial explains how Long range equivariant descriptors can be constructed using
 rascaline and the resulting descriptors be used to construct a linear model with
 equisolve
diff --git a/examples/roy-gch/roy-gch.py b/examples/roy-gch/roy-gch.py
index 5b59ae97..13ba591b 100644
--- a/examples/roy-gch/roy-gch.py
+++ b/examples/roy-gch/roy-gch.py
@@ -2,6 +2,8 @@
 Generalized Convex Hull construction for the polymorphs of ROY
 ==============================================================
 
+:Authors: Michele Ceriotti `@ceriottm <https://github.com/ceriottm/>`_
+
 This notebook analyzes the structures of 264 polymorphs of ROY, from
 `Beran et Al, Chemical Science
 (2022) <https://doi.org/10.1039/D1SC06074K>`__, comparing the
diff --git a/examples/sample-selection/sample-selection.py b/examples/sample-selection/sample-selection.py
index ad309230..02c3747b 100644
--- a/examples/sample-selection/sample-selection.py
+++ b/examples/sample-selection/sample-selection.py
@@ -2,7 +2,7 @@
 Sample and Feature Selection with FPS and CUR
 =============================================
 
-.. start-body
+:Authors: Davide Tisi `@DavideTisi <https://github.com/DavideTisi>`_
 
 In this tutorial we generate descriptors using rascaline, then select a subset
 of structures using both the farthest-point sampling (FPS) and CUR algorithms
diff --git a/generate-gallery.py b/generate-gallery.py
index 9d27e630..239a50b9 100644
--- a/generate-gallery.py
+++ b/generate-gallery.py
@@ -38,7 +38,7 @@ def __init__(self, example):
             "examples_dirs": os.path.join(HERE, example),
             "gallery_dirs": gallery_dir,
             "min_reported_time": 60,
-            "copyfile_regex": r".*\.(sh|xyz|cp2k)",
+            "copyfile_regex": r".*\.(sh|xyz|cp2k|yml)",
             "matplotlib_animations": True,
         }
 
diff --git a/noxfile.py b/noxfile.py
index fac20ca2..83bff979 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -128,6 +128,30 @@ def build_docs(session):
 
                 output.write(f"   {path}\n")
 
+                # TODO: Explain
+                with open(file) as fd:
+                    content = fd.read()
+
+                if "Download Conda environment file" in content:
+                    # do not add the download link twice
+                    pass
+                else:
+                    lines = content.split("\n")
+                    with open(file, "w") as fd:
+                        for line in lines:
+                            if "sphx-glr-download-jupyter" in line:
+                                # add the new download link before
+                                fd.write(
+                                    """
+    .. container:: sphx-glr-download
+
+      :download:`Download Conda environment file: environment.yml <environment.yml>`
+"""
+                                )
+
+                            fd.write(line)
+                            fd.write("\n")
+
     session.run("sphinx-build", "-W", "-b", "html", "docs/src", "docs/build/html")
 
 

From f58fa9cc9dbc7025b191e5b45c9dfe371a9f8a0c Mon Sep 17 00:00:00 2001
From: "Sanggyu \"Raymond\" Chong"
 <87842409+SanggyuChong@users.noreply.github.com>
Date: Thu, 28 Mar 2024 08:25:00 +0100
Subject: [PATCH 7/9] Adding the LPR example to the software cookbook (#48)

---
 .github/workflows/docs.yml   |   1 +
 examples/lpr/README.rst      |  10 ++
 examples/lpr/environment.yml |  12 ++
 examples/lpr/lpr.py          | 263 +++++++++++++++++++++++++++++++++++
 noxfile.py                   |   1 +
 5 files changed, 287 insertions(+)
 create mode 100644 examples/lpr/README.rst
 create mode 100644 examples/lpr/environment.yml
 create mode 100644 examples/lpr/lpr.py

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index a90e7fa8..7db59205 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -20,6 +20,7 @@ jobs:
           - sample-selection
           - gaas-map
           - batch-cp2k
+          - lpr
 
     steps:
       - uses: actions/checkout@v4
diff --git a/examples/lpr/README.rst b/examples/lpr/README.rst
new file mode 100644
index 00000000..dde03dd7
--- /dev/null
+++ b/examples/lpr/README.rst
@@ -0,0 +1,10 @@
+Local Prediction Rigidity (LPR)
+===============================
+
+This is an example of how one can calculate the local prediction rigidity
+for the atoms of "test" set structures, given two differently composed
+"training" set structures.
+
+It uses ``rascaline`` to compute descriptors for a database of atomic
+structures, and ``scikit-matter`` to compute the LPR. The results are
+visualized using ``chemiscope`` widgets.
diff --git a/examples/lpr/environment.yml b/examples/lpr/environment.yml
new file mode 100644
index 00000000..5dcfcf88
--- /dev/null
+++ b/examples/lpr/environment.yml
@@ -0,0 +1,12 @@
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  - pip
+  - rust
+  - pip:
+    - ase
+    - chemiscope
+    - matplotlib
+    - rascaline @ git+https://github.com/Luthaf/rascaline@5c2a79838bda0a52d0fde2fbe65941f4792c4cae
+    - skmatter
diff --git a/examples/lpr/lpr.py b/examples/lpr/lpr.py
new file mode 100644
index 00000000..f5017655
--- /dev/null
+++ b/examples/lpr/lpr.py
@@ -0,0 +1,263 @@
+"""
+LPR analysis for amorphous silicon dataset
+==========================================
+
+:Authors: Sanggyu "Raymond" Chong `@SanggyuChong <https://github.com/sanggyuChong/>`_,
+          Federico Grasselli `@fgrassel <https://github.com/fgrassel/>`_
+
+In this tutorial, we calculate the SOAP descriptors of an amorphous
+silicon dataset using rascaline, then compute the local prediction
+rigidity (LPR) for the atoms of a "test" set before and after
+modifications to the "training" dataset has been made.
+
+First, we import all the necessary packages:
+"""
+
+# %%
+import os
+import tarfile
+
+import numpy as np
+import requests
+from ase.io import read
+from matplotlib import pyplot as plt
+from matplotlib.colors import LogNorm
+from rascaline import SoapPowerSpectrum
+from sklearn.decomposition import PCA
+from skmatter.metrics import local_prediction_rigidity as lpr
+
+
+# %%
+# Load and prepare amorphous silicon data
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+#
+# We first download the dataset associated with LPR
+# analysis from Materials Cloud and load the the amorphous
+# silicon structures using `ASE <https://wiki.fysik.dtu.dk/ase/>`_.
+
+filename = "LPR_supp_notebook_dataset.tar.gz"
+if not os.path.exists(filename):
+    url = "https://rb.gy/wxsrug"  # shortened URL
+    response = requests.get(url)
+    response.raise_for_status()
+    with open(filename, "wb") as f:
+        f.write(response.content)
+
+with tarfile.open(filename) as tar:
+    tar.extractall(path=".")
+
+frames_pristine = read("datasets/Si_amo_defect_free.xyz", ":")
+frames_defect = read("datasets/Si_amo_defect_containing.xyz", ":")
+
+# Randomly shuffle the structures
+
+np.random.seed(20230215)
+
+ids = list(range(len(frames_pristine)))
+np.random.shuffle(ids)
+frames_pristine = [frames_pristine[ii] for ii in ids]
+
+ids = list(range(len(frames_defect)))
+np.random.shuffle(ids)
+frames_defect = [frames_defect[ii] for ii in ids]
+
+# %%
+# We now further refine the loaded datasets according the the
+# number of coordinated atoms that each atomic environment exhibits.
+# "Pristine" refers to structures where all of the atoms have strictly
+# 4 coordinating atoms. "Defect" refers to structures that contain
+# atoms with coordination numbers other than 4.
+#
+# We use :code:`get_all_distances` funciton of :code:`ase.Atoms` to detect the
+# number of coordinated atoms.
+
+cur_cutoff = 2.7
+refined_pristine_frames = []
+for frame in frames_pristine:
+    neighs = (frame.get_all_distances(mic=True) < cur_cutoff).sum(axis=0) - 1
+    if neighs.max() > 4 or neighs.min() < 4:
+        continue
+    else:
+        refined_pristine_frames.append(frame)
+
+refined_defect_frames = []
+for frame in frames_defect:
+    neighs = (frame.get_all_distances(mic=True) < cur_cutoff).sum(axis=0) - 1
+    num_defects = (neighs > 4).sum() + (neighs < 4).sum()
+    if num_defects > 4:
+        refined_defect_frames.append(frame)
+
+
+# %%
+# Compute SOAP descriptors using rascaline
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Now, we move on and compute the SOAP descriptors for the refined
+# structures. First, define the rascaline hyperparameters used to
+# compute SOAP. Among the hypers, notice that the cutoff is chosen
+# to be 2.85 Å, and the radial scaling is turned off. These were
+# heuristic choices made to accentuate the difference in the LPR
+# based on the nearest-neighbor coordination. (Do not blindly
+# use this set of hypers for production-quality model training!)
+
+# Hypers dictionary
+hypers = {
+    "cutoff": 2.85,
+    "max_radial": 10,
+    "max_angular": 12,
+    "atomic_gaussian_width": 0.5,
+    "center_atom_weight": 1.0,
+    "radial_basis": {"Gto": {"spline_accuracy": 1e-8}},
+    "cutoff_function": {"ShiftedCosine": {"width": 0.1}},
+    "radial_scaling": None,
+}
+# Define rascaline calculator
+calculator = SoapPowerSpectrum(**hypers)
+
+# Calculate the SOAP power spectrum
+Xlist_pristine = []
+for frame in refined_pristine_frames:
+    descriptor = calculator.compute(frame)
+    Xlist_pristine.append(np.array(descriptor.block().values))
+
+Xlist_defect = []
+for frame in refined_defect_frames:
+    descriptor = calculator.compute(frame)
+    Xlist_defect.append(np.array(descriptor.block().values))
+
+# %%
+# Organize structures into "training" and "test" sets
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Now we move on and compute the SOAP descriptors for the refined
+# structures. First, define the rascaline hyperparameters used to
+# compute SOAP.
+#
+# Notice that the format in which we handle the descriptors is as a
+# list of :code:`np.array` descriptor blocks. This is to ensure
+# compatibility with how things have been implemented in the LPR
+# module of :code:`scikit-matter`.
+
+n_train = 400
+n_add = 50
+n_test = 50
+
+X_pristine = [Xlist for Xlist in Xlist_pristine[: n_train + n_add]]
+X_defect = [Xlist for Xlist in Xlist_defect[:n_add]]
+X_test = [Xlist for Xlist in Xlist_defect[n_add : n_add + n_test]]
+
+# Save coordination values for visualization
+test_coord = []
+for frame in refined_defect_frames[n_add : n_add + n_test]:
+    coord = (frame.get_all_distances(mic=True) < cur_cutoff - 0.05).sum(axis=0) - 1
+    test_coord += coord.tolist()
+test_coord = np.array(test_coord)
+
+# %%
+# Compute the LPR for the test set
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Next, we will use the :code:`local_prediction_rigidity` module of
+# `scikit-matter <https://scikit-matter.readthedocs.io/en/latest/>`_
+# to compute the LPRs for the test set that we have set apart.
+#
+# LPR reflects how the ML model perceives a local environment,
+# given a collection of other structures, similar or different.
+# It should then carry over some of the details involved in training
+# the model, in this case the regularization strength.
+#
+# For this example, we have foregone on the actual model training,
+# and so we define an arbitrary value for the alpha.
+
+alpha = 1e-4
+LPR_test, rank = lpr(X_pristine, X_test, alpha)
+LPR_test = np.hstack(LPR_test)
+
+# %%
+# Visualizing the LPR on a PCA map
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# We now visualize the LPRs of the test set on a PCA map,
+# where the PCA is performed on the SOAP descriptors of
+# defect-containing dataset.
+
+pca = PCA(n_components=5)
+descriptors_all = calculator.compute(refined_defect_frames)
+pca.fit_transform(descriptors_all.block().values)
+PCA_test = pca.transform(np.vstack(X_test))
+
+rmin = np.log10(LPR_test.min()) + 0.5
+rmax = np.log10(LPR_test.max()) - 0.5
+
+fig = plt.figure(figsize=(5, 4), dpi=200)
+ax = fig.add_subplot()
+im = ax.scatter(
+    PCA_test[:, 0],
+    PCA_test[:, 1],
+    c=LPR_test,
+    s=20,
+    linewidths=0,
+    norm=LogNorm(vmin=10**rmin, vmax=10**rmax),
+    cmap="viridis",
+)
+
+ax.set_xlabel("PC1")
+ax.set_ylabel("PC2")
+fig.colorbar(im, ax=ax, label="LPR")
+ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
+
+# %%
+# In the PCA map, where each point corresponds to an
+# atomic environment of the test set structures, one
+# can observe 4 different clusters of points, arranged
+# along PC1. This corresponds to the coordination numbers
+# ranging from 3 to 6. Since the training set contains
+# structures exclusively composed of 4-coordinated atoms,
+# LPR is distinctly high for the second, main cluster of
+# points, and quite low for the three other clusters.
+
+
+# %%
+# Studying the LPR after dataset modification
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# We now want to see what would happen when defect structures
+# are included into the training set of the model. For this,
+# we first create a modified dataset that incorporates in the
+# defect structures, and recompute the LPR.
+
+X_new = X_pristine[:n_train] + X_defect[:n_add]
+LPR_test_new, rank = lpr(X_new, X_test, alpha)
+LPR_test_new = np.hstack(LPR_test_new)
+
+# %%
+# We then visualize the change in the LPR with the
+# modification of the dataset by plotting the same PCA
+# map, but now colored by the ratio of new set of LPR
+# values (after dataset modification) over the original
+# one.
+
+fig = plt.figure(figsize=(5, 4), dpi=200)
+ax = fig.add_subplot()
+im = ax.scatter(
+    PCA_test[:, 0],
+    PCA_test[:, 1],
+    c=LPR_test_new / LPR_test,
+    s=20,
+    linewidths=0,
+    # norm=LogNorm(vmin=10**rmin, vmax=10**rmax),
+    cmap="OrRd",
+)
+ax.set_xlabel("PC1")
+ax.set_ylabel("PC2")
+fig.colorbar(im, ax=ax, label=r"LPR$_{\mathrm{new}}$ / LPR$_{\mathrm{old}}$")
+ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
+
+# %%
+# It is apparent that while the LPR stays more or less consistent for the
+# 4-coordinated atoms, it is significantly enhanced for the defective environments
+# as a result of the inclusion of defective structures in the training set.
+
+
+# %%
diff --git a/noxfile.py b/noxfile.py
index 83bff979..881a13f2 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -175,6 +175,7 @@ def lint(session):
         "flake8",
         "--max-line-length=88",
         "--exclude=docs/src/examples/",
+        "--extend-ignore=E203",
         *LINT_FILES,
     )
 

From 1c15ffdb588d18826cf3319471a36e0bfe711ef4 Mon Sep 17 00:00:00 2001
From: Davide Tisi <davide.tisi93@gmail.com>
Date: Thu, 28 Mar 2024 15:51:36 +0100
Subject: [PATCH 8/9] remove comments and import (#53)

remove comments and import
---
 examples/sample-selection/sample-selection.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/examples/sample-selection/sample-selection.py b/examples/sample-selection/sample-selection.py
index 02c3747b..75d8abea 100644
--- a/examples/sample-selection/sample-selection.py
+++ b/examples/sample-selection/sample-selection.py
@@ -20,7 +20,6 @@
 import numpy as np
 from equisolve.numpy import feature_selection, sample_selection
 from matplotlib import pyplot as plt
-from metatensor import sum_over_samples
 from rascaline import SoapPowerSpectrum
 from sklearn.decomposition import PCA
 from skmatter import feature_selection as skfeat_selection
@@ -66,11 +65,8 @@
 
 atom_soap_single_block = atom_soap.keys_to_samples(keys_to_move=["species_center"])
 
-# print(atom_soap_single_block)
-# print(atom_soap_single_block.block(0))  # There is only one block now!
-
 # Sum over atomic centers to compute structure features
-struct_soap = sum_over_samples(
+struct_soap = metatensor.sum_over_samples(
     atom_soap_single_block, sample_names=["center", "species_center"]
 )
 

From c2dcd2f6f0f24540e3e18e96b753b7f2552a6372 Mon Sep 17 00:00:00 2001
From: Guillaume Fraux <luthaf@luthaf.fr>
Date: Fri, 5 Apr 2024 15:07:56 +0200
Subject: [PATCH 9/9] Small infrastructure fixes (#56)

* Require a recent nox

* Pull rust >= 1.65 when using it for rascaline
---
 examples/gaas-map/environment.yml         | 2 +-
 examples/lode-linear/environment.yml      | 2 +-
 examples/lpr/environment.yml              | 2 +-
 examples/roy-gch/environment.yml          | 2 +-
 examples/sample-selection/environment.yml | 2 +-
 noxfile.py                                | 1 +
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/gaas-map/environment.yml b/examples/gaas-map/environment.yml
index 32dfa760..27afd282 100644
--- a/examples/gaas-map/environment.yml
+++ b/examples/gaas-map/environment.yml
@@ -3,7 +3,7 @@ channels:
 dependencies:
   - python=3.11
   - pip
-  - rust
+  - rust >=1.65
   - pip:
     - ase
     - chemiscope
diff --git a/examples/lode-linear/environment.yml b/examples/lode-linear/environment.yml
index 2a66a8ba..74144273 100644
--- a/examples/lode-linear/environment.yml
+++ b/examples/lode-linear/environment.yml
@@ -3,7 +3,7 @@ channels:
 dependencies:
   - python=3.11
   - pip
-  - rust
+  - rust >=1.65
   - pip:
     - ase
     - equisolve @ git+https://github.com/lab-cosmo/equisolve.git@c858bedef4b2799eb445e4c92535ee387224089a
diff --git a/examples/lpr/environment.yml b/examples/lpr/environment.yml
index 5dcfcf88..877dff7c 100644
--- a/examples/lpr/environment.yml
+++ b/examples/lpr/environment.yml
@@ -3,7 +3,7 @@ channels:
 dependencies:
   - python=3.11
   - pip
-  - rust
+  - rust >=1.65
   - pip:
     - ase
     - chemiscope
diff --git a/examples/roy-gch/environment.yml b/examples/roy-gch/environment.yml
index 95bed51e..fe3f1a41 100644
--- a/examples/roy-gch/environment.yml
+++ b/examples/roy-gch/environment.yml
@@ -3,7 +3,7 @@ channels:
 dependencies:
   - python=3.11
   - pip
-  - rust
+  - rust >=1.65
   - pip:
     - ase
     - chemiscope
diff --git a/examples/sample-selection/environment.yml b/examples/sample-selection/environment.yml
index 7e2cf3b9..48eabd61 100644
--- a/examples/sample-selection/environment.yml
+++ b/examples/sample-selection/environment.yml
@@ -3,7 +3,7 @@ channels:
 dependencies:
   - python=3.11
   - pip
-  - rust
+  - rust >=1.65
   - pip:
     - ase
     - chemiscope
diff --git a/noxfile.py b/noxfile.py
index 881a13f2..a6779de4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -7,6 +7,7 @@
 
 
 # global nox options
+nox.needs_version = ">=2024"
 nox.options.reuse_venv = "yes"
 nox.options.sessions = ["lint", "docs"]