diff --git a/configure b/configure
index 8f28351035..2eabb2f4db 100755
--- a/configure
+++ b/configure
@@ -751,6 +751,7 @@ enable_af_ocl
 enable_af_cuda
 enable_af_cpu
 enable_libtorch
+enable_metatensor
 enable_openmp
 '
       ac_precious_vars='build_alias
@@ -1452,6 +1453,7 @@ Optional Features:
   --enable-af_cuda        enable search for arrayfire_cuda, default: no
   --enable-af_cpu         enable search for arrayfire_cpu, default: no
   --enable-libtorch       enable search for libtorch, default: no
+  --enable-metatensor     enable search for metatensor, default: no
   --disable-openmp        do not use OpenMP
 
 Some influential environment variables:
@@ -3220,6 +3222,24 @@ fi
 
  #added by luigibonati
 
+metatensor=
+# Check whether --enable-metatensor was given.
+if test "${enable_metatensor+set}" = set; then :
+  enableval=$enable_metatensor; case "${enableval}" in
+             (yes) metatensor=true ;;
+             (no)  metatensor=false ;;
+             (*)   as_fn_error $? "wrong argument to --enable-metatensor" "$LINENO" 5 ;;
+  esac
+else
+  case "no" in
+             (yes) metatensor=true ;;
+             (no)  metatensor=false ;;
+  esac
+
+fi
+
+
+
 
 
 
@@ -9635,6 +9655,11 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_ARRAYFIRE" >&2;}
 
 fi
 
+# metatensor requires libtorch
+if test $metatensor = true ; then
+  libtorch=true;
+fi
+
 #added by luigibonati
 if test $libtorch = true ; then
   # disable as-needed in linking libraries (both static and shared)
@@ -9955,6 +9980,182 @@ $as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_LIBTORCH" >&2;}
   fi
 fi
 
+if test $metatensor = true ; then
+  # find metatensor and metatensor_torch
+
+    found=ko
+    __PLUMED_HAS_METATENSOR=no
+    if test "${libsearch}" = true ; then
+      testlibs="metatensor metatensor_torch"
+    else
+      testlibs=""
+    fi
+    save_LIBS="$LIBS"
+
+    # check if multiple libraries are required simultaneously
+    multiple="no"
+    if test "true" = "true"; then
+      multiple="yes"
+      all_LIBS=""
+      for testlib in $testlibs;
+      do
+        all_LIBS="$all_LIBS -l$testlib"
+      done
+      testlibs=" " # to check only without libraries, and later with all together
+    fi
+
+    # check without libraries
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking metatensor without extra libs" >&5
+$as_echo_n "checking metatensor without extra libs... " >&6; }
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    // torch header creates a lot a pedantic warnings, which we can't do anything about
+    // we disable them to make finding the relevant part in the config.log easier
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wpedantic"
+    #pragma GCC diagnostic ignored "-Wunused-parameter"
+    #pragma GCC diagnostic ignored "-Wfloat-equal"
+    #pragma GCC diagnostic ignored "-Wfloat-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
+    #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+    #pragma GCC diagnostic ignored "-Wsign-conversion"
+    #pragma GCC diagnostic ignored "-Wold-style-cast"
+    #include <torch/torch.h>
+    #include <torch/script.h>
+    #pragma GCC diagnostic pop
+    #include <metatensor/torch.hpp>
+    #if METATENSOR_TORCH_VERSION_MAJOR != 0 || METATENSOR_TORCH_VERSION_MINOR != 5
+    #error "this code is only compatible with metatensor-torch >=0.5.1,<0.6"
+    #endif
+    int main() {
+      metatensor_torch::version();
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+          { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+    if test "$found" = "ko" ; then
+      if test "$multiple" = "yes" ; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking metatensor with $all_LIBS" >&5
+$as_echo_n "checking metatensor with $all_LIBS... " >&6; }
+        LIBS="$all_LIBS $LIBS"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    // torch header creates a lot a pedantic warnings, which we can't do anything about
+    // we disable them to make finding the relevant part in the config.log easier
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wpedantic"
+    #pragma GCC diagnostic ignored "-Wunused-parameter"
+    #pragma GCC diagnostic ignored "-Wfloat-equal"
+    #pragma GCC diagnostic ignored "-Wfloat-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
+    #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+    #pragma GCC diagnostic ignored "-Wsign-conversion"
+    #pragma GCC diagnostic ignored "-Wold-style-cast"
+    #include <torch/torch.h>
+    #include <torch/script.h>
+    #pragma GCC diagnostic pop
+    #include <metatensor/torch.hpp>
+    #if METATENSOR_TORCH_VERSION_MAJOR != 0 || METATENSOR_TORCH_VERSION_MINOR != 5
+    #error "this code is only compatible with metatensor-torch >=0.5.1,<0.6"
+    #endif
+    int main() {
+      metatensor_torch::version();
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+      else
+        for testlib in $testlibs
+        do
+          { $as_echo "$as_me:${as_lineno-$LINENO}: checking metatensor with -l$testlib" >&5
+$as_echo_n "checking metatensor with -l$testlib... " >&6; }
+          LIBS="-l$testlib $LIBS"
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    // torch header creates a lot a pedantic warnings, which we can't do anything about
+    // we disable them to make finding the relevant part in the config.log easier
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wpedantic"
+    #pragma GCC diagnostic ignored "-Wunused-parameter"
+    #pragma GCC diagnostic ignored "-Wfloat-equal"
+    #pragma GCC diagnostic ignored "-Wfloat-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
+    #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+    #pragma GCC diagnostic ignored "-Wsign-conversion"
+    #pragma GCC diagnostic ignored "-Wold-style-cast"
+    #include <torch/torch.h>
+    #include <torch/script.h>
+    #pragma GCC diagnostic pop
+    #include <metatensor/torch.hpp>
+    #if METATENSOR_TORCH_VERSION_MAJOR != 0 || METATENSOR_TORCH_VERSION_MINOR != 5
+    #error "this code is only compatible with metatensor-torch >=0.5.1,<0.6"
+    #endif
+    int main() {
+      metatensor_torch::version();
+      return 0;
+    }
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  found=ok
+                { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+          if test $found = ok ; then
+            break
+          fi
+          LIBS="$save_LIBS"
+        done
+      fi
+    fi
+
+    if test $found = ok ; then
+      $as_echo "#define __PLUMED_HAS_METATENSOR 1" >>confdefs.h
+
+      __PLUMED_HAS_METATENSOR=yes
+    else
+      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot enable __PLUMED_HAS_METATENSOR" >&5
+$as_echo "$as_me: WARNING: cannot enable __PLUMED_HAS_METATENSOR" >&2;}
+      LIBS="$save_LIBS"
+    fi
+
+fi
+
 # in non-debug mode, add -DNDEBUG
 if test "$debug" = false ; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: Release mode, adding -DNDEBUG" >&5
diff --git a/configure.ac b/configure.ac
index debe3346d0..fd035c6c63 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,7 @@ PLUMED_CONFIG_ENABLE([af_ocl],[search for arrayfire_ocl],[no])
 PLUMED_CONFIG_ENABLE([af_cuda],[search for arrayfire_cuda],[no])
 PLUMED_CONFIG_ENABLE([af_cpu],[search for arrayfire_cpu],[no])
 PLUMED_CONFIG_ENABLE([libtorch],[search for libtorch],[no]) #added by luigibonati
+PLUMED_CONFIG_ENABLE([metatensor],[search for metatensor],[no])
 
 AC_ARG_VAR(SOEXT,[extension of dynamic libraries (so/dylib)])
 AC_ARG_VAR(STATIC_LIBS,[variables that should be linked statically directly to MD code - configure will add here -ldl if necessary ])
@@ -926,6 +927,11 @@ if test "$af_cpu" = true ; then
   PLUMED_CHECK_PACKAGE([arrayfire.h],[af_is_double],[__PLUMED_HAS_ARRAYFIRE],[afcpu])
 fi
 
+# metatensor requires libtorch
+if test $metatensor = true ; then
+  libtorch=true;
+fi
+
 #added by luigibonati
 if test $libtorch = true ; then 
   # disable as-needed in linking libraries (both static and shared)
@@ -963,6 +969,35 @@ if test $libtorch = true ; then
   fi
 fi
 
+if test $metatensor = true ; then
+  # find metatensor and metatensor_torch
+  PLUMED_CHECK_CXX_PACKAGE([metatensor],[
+    // torch header creates a lot a pedantic warnings, which we can't do anything about
+    // we disable them to make finding the relevant part in the config.log easier
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wpedantic"
+    #pragma GCC diagnostic ignored "-Wunused-parameter"
+    #pragma GCC diagnostic ignored "-Wfloat-equal"
+    #pragma GCC diagnostic ignored "-Wfloat-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
+    #pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
+    #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+    #pragma GCC diagnostic ignored "-Wsign-conversion"
+    #pragma GCC diagnostic ignored "-Wold-style-cast"
+    #include <torch/torch.h>
+    #include <torch/script.h>
+    #pragma GCC diagnostic pop
+    #include <metatensor/torch.hpp>
+    #if METATENSOR_TORCH_VERSION_MAJOR != 0 || METATENSOR_TORCH_VERSION_MINOR != 5
+    #error "this code is only compatible with metatensor-torch >=0.5.1,<0.6"
+    #endif
+    int main() {
+      metatensor_torch::version();
+      return 0;
+    }
+  ], [__PLUMED_HAS_METATENSOR], [metatensor metatensor_torch], [true])
+fi
+
 # in non-debug mode, add -DNDEBUG
 if test "$debug" = false ; then
   AC_MSG_NOTICE([Release mode, adding -DNDEBUG])
diff --git a/regtest/.gitignore b/regtest/.gitignore
index 192ad04ae4..256848674e 100644
--- a/regtest/.gitignore
+++ b/regtest/.gitignore
@@ -42,6 +42,7 @@
 !/clusters
 !/unittest
 !/wham
+!/metatensor
 # These files we just want to ignore completely
 tmp
 report.txt
diff --git a/regtest/metatensor/rt-basic/Makefile b/regtest/metatensor/rt-basic/Makefile
new file mode 100644
index 0000000000..3703b27cea
--- /dev/null
+++ b/regtest/metatensor/rt-basic/Makefile
@@ -0,0 +1 @@
+include ../../scripts/test.make
diff --git a/regtest/metatensor/rt-basic/config b/regtest/metatensor/rt-basic/config
new file mode 100644
index 0000000000..acec0ad7d9
--- /dev/null
+++ b/regtest/metatensor/rt-basic/config
@@ -0,0 +1,8 @@
+plumed_modules=metatensor
+plumed_needs=metatensor
+type=driver
+
+# NOTE: to enable --debug-forces, also change the dtype of the models to float64
+arg="--plumed plumed.dat --ixyz structure.xyz --length-units A --dump-forces forces --dump-forces-fmt %8.2f" # --debug-forces forces.num"
+
+PLUMED_ALLOW_SKIP_ON_TRAVIS=yes
diff --git a/regtest/metatensor/rt-basic/cv.py b/regtest/metatensor/rt-basic/cv.py
new file mode 100644
index 0000000000..192343d375
--- /dev/null
+++ b/regtest/metatensor/rt-basic/cv.py
@@ -0,0 +1,165 @@
+from typing import Dict, List, Optional
+
+import torch
+from metatensor.torch import Labels, TensorBlock, TensorMap
+from metatensor.torch.atomistic import (
+    MetatensorAtomisticModel,
+    ModelCapabilities,
+    ModelMetadata,
+    ModelOutput,
+    NeighborListOptions,
+    System,
+)
+
+
+class TestCollectiveVariable(torch.nn.Module):
+    r"""
+    This class computes a simple CV which is then used to test metatensor integration
+    with PLUMED.
+
+    The per-atom CV is defined as a sum over all pairs for an atom:
+
+        CV^1_i = \sum_j 1/r_ij
+        CV^2_i = \sum_j 1/r_ij^2
+
+    The global CV is a sum over all atoms of the per-atom CV:
+
+        CV^1 = \sum_i CV^1_i
+        CV^2 = \sum_i CV^2_i
+
+    If ``multiple_properties=True``, a only CV^1 is returned, otherwise both CV^1 and
+    CV^2 are returned.
+    """
+
+    def __init__(self, cutoff, multiple_properties):
+        super().__init__()
+
+        self._nl_request = NeighborListOptions(cutoff=cutoff, full_list=True)
+        self._multiple_properties = multiple_properties
+
+    def forward(
+        self,
+        systems: List[System],
+        outputs: Dict[str, ModelOutput],
+        selected_atoms: Optional[Labels],
+    ) -> Dict[str, TensorMap]:
+
+        if "plumed::cv" not in outputs:
+            return {}
+
+        device = torch.device("cpu")
+        if len(systems) > 0:
+            device = systems[0].positions.device
+
+        if selected_atoms is not None:
+            raise ValueError("selected atoms is not supported")
+
+        output = outputs["plumed::cv"]
+
+        if output.per_atom:
+            samples_list: List[List[int]] = []
+            for s, system in enumerate(systems):
+                for i in range(len(system)):
+                    samples_list.append([s, i])
+
+            sample_values = torch.tensor(samples_list, device=device, dtype=torch.int32)
+            samples = Labels(
+                ["system", "atom"],
+                sample_values.reshape(-1, 2),
+            )
+        else:
+            samples = Labels(
+                "system", torch.arange(len(systems), device=device).reshape(-1, 1)
+            )
+
+        if self._multiple_properties:
+            properties = Labels("cv", torch.tensor([[0], [1]], device=device))
+        else:
+            properties = Labels("cv", torch.tensor([[0]], device=device))
+
+        values = torch.zeros(
+            (len(samples), len(properties)), dtype=torch.float32, device=device
+        )
+        system_start = 0
+        for system_i, system in enumerate(systems):
+            system_stop = system_start + len(system)
+
+            neighbors = system.get_neighbor_list(self._nl_request)
+
+            atom_index = neighbors.samples.column("first_atom")
+            distances = torch.linalg.vector_norm(neighbors.values.reshape(-1, 3), dim=1)
+            inv_dist = 1.0 / distances
+
+            if distances.shape[0] != 0:
+                if output.per_atom:
+                    sliced = values[system_start:system_stop, 0]
+                    sliced += sliced.index_add(0, atom_index, inv_dist)
+                else:
+                    values[system_i, 0] += inv_dist.sum()
+
+                if self._multiple_properties:
+                    if output.per_atom:
+                        sliced = values[system_start:system_stop, 1]
+                        sliced += sliced.index_add(0, atom_index, inv_dist**2)
+                    else:
+                        values[system_i, 1] += inv_dist.sum() ** 2
+
+            system_start = system_stop
+
+        block = TensorBlock(
+            values=values,
+            samples=samples,
+            components=[],
+            properties=properties,
+        )
+        cv = TensorMap(
+            keys=Labels("_", torch.tensor([[0]], device=device)),
+            blocks=[block],
+        )
+
+        return {"plumed::cv": cv}
+
+    def requested_neighbor_lists(self) -> List[NeighborListOptions]:
+        return [self._nl_request]
+
+
+CUTOFF = 3.5
+
+capabilities = ModelCapabilities(
+    outputs={"plumed::cv": ModelOutput(per_atom=True)},
+    interaction_range=CUTOFF,
+    supported_devices=["cpu", "mps", "cuda"],
+    length_unit="A",
+    atomic_types=[6],
+    dtype="float32",
+)
+
+# export all variations of the model
+cv = TestCollectiveVariable(cutoff=CUTOFF, multiple_properties=False)
+cv.eval()
+model = MetatensorAtomisticModel(cv, ModelMetadata(), capabilities)
+model.export("scalar-per-atom.pt")
+
+cv = TestCollectiveVariable(cutoff=CUTOFF, multiple_properties=True)
+cv.eval()
+model = MetatensorAtomisticModel(cv, ModelMetadata(), capabilities)
+model.export("vector-per-atom.pt")
+
+capabilities = ModelCapabilities(
+    outputs={"plumed::cv": ModelOutput(per_atom=False)},
+    interaction_range=CUTOFF,
+    supported_devices=["cpu", "mps", "cuda"],
+    length_unit="A",
+    atomic_types=[6],
+    dtype="float32",
+)
+
+cv = TestCollectiveVariable(cutoff=CUTOFF, multiple_properties=False)
+cv.eval()
+model = MetatensorAtomisticModel(cv, ModelMetadata(), capabilities)
+model.export("scalar-global.pt")
+
+cv = TestCollectiveVariable(cutoff=CUTOFF, multiple_properties=True)
+cv.eval()
+model = MetatensorAtomisticModel(cv, ModelMetadata(), capabilities)
+model.export("vector-global.pt")
diff --git a/regtest/metatensor/rt-basic/forces.reference b/regtest/metatensor/rt-basic/forces.reference
new file mode 100644
index 0000000000..92e9575e70
--- /dev/null
+++ b/regtest/metatensor/rt-basic/forces.reference
@@ -0,0 +1,11 @@
+9
+-4809.71 -5697.44 -5071.73
+X   147.68   -79.47  -178.64
+X  -174.52   -55.59   124.40
+X  -216.07    78.68   152.02
+X  -425.43   203.97   277.68
+X    71.52   -44.53    35.34
+X   231.25  -151.21  -440.05
+X   141.26   -55.91   -67.49
+X   208.93   -18.51   211.77
+X    15.37   122.58  -115.02
diff --git a/regtest/metatensor/rt-basic/plumed.dat b/regtest/metatensor/rt-basic/plumed.dat
new file mode 100644
index 0000000000..bb0177dad8
--- /dev/null
+++ b/regtest/metatensor/rt-basic/plumed.dat
@@ -0,0 +1,64 @@
+scalar_global: METATENSOR ...
+      MODEL=scalar-global.pt
+      DEVICE=cpu
+      # DEVICE=cuda
+      # DEVICE=mps
+
+      SPECIES1=1-9
+      SPECIES_TO_TYPES=6
+...
+
+PRINT ARG=scalar_global FILE=scalar_global FMT=%8.2f
+
+
+scalar_per_atom: METATENSOR ...
+      MODEL=scalar-per-atom.pt
+      DEVICE=cpu
+      # DEVICE=cuda
+      # DEVICE=mps
+
+      SPECIES1=1-9
+      SPECIES_TO_TYPES=6
+...
+
+PRINT ARG=scalar_per_atom FILE=scalar_per_atom FMT=%8.2f
+
+
+vector_global: METATENSOR ...
+      MODEL=vector-global.pt
+      DEVICE=cpu
+      # DEVICE=cuda
+      # DEVICE=mps
+
+      SPECIES1=1-9
+      SPECIES_TO_TYPES=6
+...
+
+PRINT ARG=vector_global FILE=vector_global FMT=%8.2f
+
+
+vector_per_atom: METATENSOR ...
+      MODEL=vector-per-atom.pt
+      DEVICE=cpu
+      # DEVICE=cuda
+      # DEVICE=mps
+
+      SPECIES1=1-9
+      SPECIES_TO_TYPES=6
+...
+
+PRINT ARG=vector_per_atom FILE=vector_per_atom FMT=%8.2f
+
+
+scalar_per_atom_sum: SUM ARG=scalar_per_atom PERIODIC=NO
+vector_global_sum: SUM ARG=vector_global PERIODIC=NO
+vector_per_atom_sum: SUM ARG=vector_per_atom PERIODIC=NO
+
+summed: CUSTOM ...
+      ARG=scalar_global,scalar_per_atom_sum,vector_global_sum,vector_per_atom_sum
+      VAR=x,y,z,t
+      FUNC=x+y+z+t
+      PERIODIC=NO
+...
+
+BIASVALUE ARG=summed
diff --git a/regtest/metatensor/rt-basic/scalar-global.pt b/regtest/metatensor/rt-basic/scalar-global.pt
new file mode 100644
index 0000000000..6009bf96b2
Binary files /dev/null and b/regtest/metatensor/rt-basic/scalar-global.pt differ
diff --git a/regtest/metatensor/rt-basic/scalar-per-atom.pt b/regtest/metatensor/rt-basic/scalar-per-atom.pt
new file mode 100644
index 0000000000..a8075e77f6
Binary files /dev/null and b/regtest/metatensor/rt-basic/scalar-per-atom.pt differ
diff --git a/regtest/metatensor/rt-basic/scalar_global.reference b/regtest/metatensor/rt-basic/scalar_global.reference
new file mode 100644
index 0000000000..ac566d8269
--- /dev/null
+++ b/regtest/metatensor/rt-basic/scalar_global.reference
@@ -0,0 +1,2 @@
+#! FIELDS time scalar_global
+ 0.000000    87.04
diff --git a/regtest/metatensor/rt-basic/scalar_per_atom.reference b/regtest/metatensor/rt-basic/scalar_per_atom.reference
new file mode 100644
index 0000000000..f5b24df224
--- /dev/null
+++ b/regtest/metatensor/rt-basic/scalar_per_atom.reference
@@ -0,0 +1,2 @@
+#! FIELDS time scalar_per_atom.1 scalar_per_atom.2 scalar_per_atom.3 scalar_per_atom.4 scalar_per_atom.5 scalar_per_atom.6 scalar_per_atom.7 scalar_per_atom.8 scalar_per_atom.9
+ 0.000000     9.73     9.34     9.93     9.06     8.87    10.28     9.32     9.93    10.58
diff --git a/regtest/metatensor/rt-basic/structure.xyz b/regtest/metatensor/rt-basic/structure.xyz
new file mode 100644
index 0000000000..d1d831dae6
--- /dev/null
+++ b/regtest/metatensor/rt-basic/structure.xyz
@@ -0,0 +1,11 @@
+9
+4.0 4.0 4.0
+C 2.3535 5.10858 4.48679
+C 2.83322 3.38203 3.004
+C 3.66083 1.24405 2.65283
+C 3.81093 3.40544 1.76377
+C 4.22474 0.188689 3.59337
+C 4.29399 7.00701 1.25775
+C 4.52552 4.72654 1.62947
+C 4.63227 2.20046 2.00559
+C 5.4412 1.75131 0.827934
diff --git a/regtest/metatensor/rt-basic/vector-global.pt b/regtest/metatensor/rt-basic/vector-global.pt
new file mode 100644
index 0000000000..15c073c8c4
Binary files /dev/null and b/regtest/metatensor/rt-basic/vector-global.pt differ
diff --git a/regtest/metatensor/rt-basic/vector-per-atom.pt b/regtest/metatensor/rt-basic/vector-per-atom.pt
new file mode 100644
index 0000000000..919831b43a
Binary files /dev/null and b/regtest/metatensor/rt-basic/vector-per-atom.pt differ
diff --git a/regtest/metatensor/rt-basic/vector_global.reference b/regtest/metatensor/rt-basic/vector_global.reference
new file mode 100644
index 0000000000..b51a835f5e
--- /dev/null
+++ b/regtest/metatensor/rt-basic/vector_global.reference
@@ -0,0 +1,2 @@
+#! FIELDS time vector_global.1 vector_global.2
+ 0.000000    87.04  7575.59
diff --git a/regtest/metatensor/rt-basic/vector_per_atom.reference b/regtest/metatensor/rt-basic/vector_per_atom.reference
new file mode 100644
index 0000000000..755aa2231a
--- /dev/null
+++ b/regtest/metatensor/rt-basic/vector_per_atom.reference
@@ -0,0 +1,2 @@
+#! FIELDS time vector_per_atom.1.1 vector_per_atom.1.2 vector_per_atom.2.1 vector_per_atom.2.2 vector_per_atom.3.1 vector_per_atom.3.2 vector_per_atom.4.1 vector_per_atom.4.2 vector_per_atom.5.1 vector_per_atom.5.2 vector_per_atom.6.1 vector_per_atom.6.2 vector_per_atom.7.1 vector_per_atom.7.2 vector_per_atom.8.1 vector_per_atom.8.2 vector_per_atom.9.1 vector_per_atom.9.2
+ 0.000000     9.73     3.95     9.34     3.68     9.93     4.29     9.06     5.04     8.87     3.78    10.28     5.47     9.32     4.34     9.93     4.72    10.58     4.50
diff --git a/src/.gitignore b/src/.gitignore
index b0a047c56e..9dcf4a92f5 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -65,6 +65,7 @@
 !/pytorch
 !/membranefusion
 !/wham
+!/metatensor
 
 # And just ignore these files
 *.xxd
diff --git a/src/metatensor/.gitignore b/src/metatensor/.gitignore
new file mode 100644
index 0000000000..ded87dba97
--- /dev/null
+++ b/src/metatensor/.gitignore
@@ -0,0 +1,13 @@
+/*
+# in this directory, only accept source, Makefile and README
+!/.gitignore
+!/*.c
+!/*.cpp
+!/*.h
+!/*.sh
+!/Makefile
+!/README.md
+!/module.type
+!/COPYRIGHT
+!/PEOPLE-METATENSOR
+!external/
diff --git a/src/metatensor/COPYRIGHT b/src/metatensor/COPYRIGHT
new file mode 100644
index 0000000000..da51e08c6d
--- /dev/null
+++ b/src/metatensor/COPYRIGHT
@@ -0,0 +1,20 @@
+Copyright (c) 2024 The METATENSOR code team
+(see the PEOPLE-METATENSOR file at the root of this folder for a list of names)
+
+See https://docs.metatensor.org/latest/ for more information about the
+metatensor package that this module allows you to call from PLUMED.
+
+This file is part of METATENSOR-PLUMED module.
+
+The METATENSOR-PLUMED module is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The METATENSOR-PLUMED module is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the METATENSOR-PLUMED module. If not, see <http://www.gnu.org/licenses/>.
diff --git a/src/metatensor/Makefile b/src/metatensor/Makefile
new file mode 100644
index 0000000000..fce705eebf
--- /dev/null
+++ b/src/metatensor/Makefile
@@ -0,0 +1,4 @@
+USE=core
+
+#generic makefile
+include ../maketools/make.module
diff --git a/src/metatensor/PEOPLE-METATENSOR b/src/metatensor/PEOPLE-METATENSOR
new file mode 100644
index 0000000000..f9a903c94f
--- /dev/null
+++ b/src/metatensor/PEOPLE-METATENSOR
@@ -0,0 +1,11 @@
+METATENSOR-PLUMED module is covered under the GNU LESSER GENERAL PUBLIC LICENSE
+(see COPYING.LESSER in PLUMED2 root directory). This license applies to all the
+source code in this directory, except from where explicitly indicated.
+
+Authors of this software are:
+
+- Guillaume Fraux (Github: @Luthaf)
+
+The following people have contributed to this software:
+
+- Gareth Tribello (Github: @gtribello)
diff --git a/src/metatensor/README.md b/src/metatensor/README.md
new file mode 100644
index 0000000000..6672f16250
--- /dev/null
+++ b/src/metatensor/README.md
@@ -0,0 +1,10 @@
+# Metatensor module for PLUMED
+
+
+## Building the code
+
+See [the main documentation](../../user-doc/METATENSORMOD.md) for more
+information on how to compile and use this module.
+
+
+<!-- TODO: explain vesin update process -->
diff --git a/src/metatensor/import.sh b/src/metatensor/import.sh
new file mode 100755
index 0000000000..a1a80f9600
--- /dev/null
+++ b/src/metatensor/import.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+path=$1
+
+if (($# != 1)) ; then
+echo "usage: $0 /path/to/vesin"
+echo
+echo "All the commands to run:"
+echo "    cd /some/dir/"
+echo "    git clone https://github.com/luthaf/vesin"
+echo "    cd /your/plumed2/src/metatensor"
+echo "    ./import.sh /some/dir/vesin"
+exit 0
+fi
+
+bash -c "$path/create-single-cpp.py > /dev/null"
+
+cp $path/include/vesin.h vesin.h
+mv vesin-single-build.cpp vesin.cpp
+
+# Patch files to follow PLUMED linter
+sed 's|#define VESIN_H|#define VESIN_H\n/*INDENT-OFF*/\n|
+     s|VESIN_H|__PLUMED_metatensor_vesin_h|g
+     s|<stddef.h>|<cstddef>|
+     s|<stdint.h>|<cstdint>|
+     s|extern "C" {|namespace PLMD {\nnamespace metatensor {\nnamespace vesin {\nextern "C" {|
+     s|} // extern "C"|} // extern "C"\n} // namespace vesin\n} // namespace metatensor\n} // namespace PLMD|
+    ' vesin.h > tmp
+mv tmp vesin.h
+
+sed '1 s|^|/*INDENT-OFF*/\n#include "vesin.h"\n|
+     s|<stddef.h>|<cstddef>|
+     s|<stdint.h>|<cstdint>|
+     s|vesin::|PLMD::metatensor::vesin::|
+     s|namespace vesin {|namespace PLMD {\nnamespace metatensor {\nnamespace vesin {|
+     s|} // namespace vesin|} // namespace vesin\n} // namespace metatensor\n} // namespace PLMD|
+     s|using namespace PLMD::metatensor::vesin::cpu;|using namespace PLMD::metatensor::vesin;\nusing namespace PLMD::metatensor::vesin::cpu;|
+    ' vesin.cpp > tmp
+mv tmp vesin.cpp
+
+cd ..
+./header.sh metatensor
diff --git a/src/metatensor/metatensor.cpp b/src/metatensor/metatensor.cpp
new file mode 100644
index 0000000000..7052d38f08
--- /dev/null
+++ b/src/metatensor/metatensor.cpp
@@ -0,0 +1,964 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Copyright (c) 2024 The METATENSOR code team
+(see the PEOPLE-METATENSOR file at the root of this folder for a list of names)
+
+See https://docs.metatensor.org/latest/ for more information about the
+metatensor package that this module allows you to call from PLUMED.
+
+This file is part of METATENSOR-PLUMED module.
+
+The METATENSOR-PLUMED module is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The METATENSOR-PLUMED module is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the METATENSOR-PLUMED module. If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#include "core/ActionAtomistic.h"
+#include "core/ActionWithValue.h"
+#include "core/ActionRegister.h"
+#include "core/PlumedMain.h"
+
+//+PLUMEDOC METATENSORMOD_COLVAR METATENSOR
+/*
+Use arbitrary machine learning models as collective variables.
+
+Note that this action requires the metatensor-torch library. Check the
+instructions in the \ref METATENSORMOD page to enable this module.
+
+This action enables the use of fully custom machine learning models — based on
+the [metatensor atomistic models][mts_models] interface — as collective
+variables in PLUMED. Such machine learning model are typically written and
+customized using Python code, and then exported to run within PLUMED as
+[TorchScript], which is a subset of Python that can be executed by the C++ torch
+library.
+
+Metatensor offers a way to define such models and pass data from PLUMED (or any
+other simulation engine) to the model and back. For more information on how to
+define such model, have a look at the [corresponding tutorials][mts_tutorials],
+or at the code in `regtest/metatensor/`. Each of the Python scripts in this
+directory defines a custom machine learning CV that can be used with PLUMED.
+
+\par Examples
+
+The following input shows how you can call metatensor and evaluate the model
+that is described in the file `custom_cv.pt` from PLUMED.
+
+\plumedfile
+metatensor_cv: METATENSOR ...
+    MODEL=custom_cv.pt
+
+    SPECIES1=1-26
+    SPECIES2=27-62
+    SPECIES3=63-76
+    SPECIES_TO_TYPES=6,1,8
+...
+\endplumedfile
+
+The numbered `SPECIES` labels are used to indicate the list of atoms that belong
+to each atomic species in the system. The `SPECIES_TO_TYPE` keyword then
+provides information on the atom type for each species. The first number here is
+the atomic type of the atoms that have been specified using the `SPECIES1` flag,
+the second number is the atomic number of the atoms that have been specified
+using the `SPECIES2` flag and so on.
+
+`METATENSOR` action also accepts the following options:
+
+- `EXTENSIONS_DIRECTORY` should be the path to a directory containing
+  TorchScript extensions (as shared libraries) that are required to load and
+  execute the model. This matches the `collect_extensions` argument to
+  `MetatensorAtomisticModel.export` in Python.
+- `NO_CONSISTENCY_CHECK` can be used to disable internal consistency checks;
+- `SELECTED_ATOMS` can be used to signal the metatensor models that it should
+  only run its calculation for the selected subset of atoms. The model still
+  need to know about all the atoms in the system (through the `SPECIES`
+  keyword); but this can be used to reduce the calculation cost. Note that the
+  indices of the selected atoms should start at 1 in the PLUMED input file, but
+  they will be translated to start at 0 when given to the model (i.e. in
+  Python/TorchScript, the `forward` method will receive a `selected_atoms` which
+  starts at 0)
+
+Here is another example with all the possible keywords:
+
+\plumedfile
+soap: METATENSOR ...
+    MODEL=soap.pt
+    EXTENSION_DIRECTORY=extensions
+    NO_CONSISTENCY_CHECK
+
+    SPECIES1=1-10
+    SPECIES2=11-20
+    SPECIES_TO_TYPES=8,13
+
+    # only run the calculation for the Aluminium (type 13) atoms, but
+    # include the Oxygen (type 8) as potential neighbors.
+    SELECTED_ATOMS=11-20
+...
+\endplumedfile
+
+\par Collective variables and metatensor models
+
+Collective variables are not yet part of the [known outputs][mts_outputs] for
+metatensor models. Until the output format is standardized, this action expects
+the following:
+
+- the output name should be `"plumed::cv"`;
+- the output should contain a single [block][mts_block];
+- the output samples should be named `["system", "atom"]` for per-atom outputs;
+  or `["system"]` for global outputs. The `"system"` index should always be 0,
+  and the `"atom"` index should be the index of the atom (between 0 and the
+  total number of atoms);
+- the output should not have any components;
+- the output can have arbitrary properties;
+- the output should not have any explicit gradients, all gradient calculations
+  are done using autograd.
+
+*/ /*
+
+[TorchScript]: https://pytorch.org/docs/stable/jit.html
+[mts_models]: https://docs.metatensor.org/latest/atomistic/index.html
+[mts_tutorials]: https://docs.metatensor.org/latest/examples/atomistic/index.html
+[mts_outputs]: https://docs.metatensor.org/latest/atomistic/outputs.html
+[mts_block]: https://docs.metatensor.org/latest/torch/reference/block.html
+
+*/
+//+ENDPLUMEDOC
+
+/*INDENT-OFF*/
+#if !defined(__PLUMED_HAS_METATENSOR) || !defined(__PLUMED_HAS_LIBTORCH)
+
+namespace PLMD { namespace metatensor {
+class MetatensorPlumedAction: public ActionAtomistic, public ActionWithValue {
+public:
+    static void registerKeywords(Keywords& keys);
+    explicit MetatensorPlumedAction(const ActionOptions& options):
+        Action(options),
+        ActionAtomistic(options),
+        ActionWithValue(options)
+    {
+        throw std::runtime_error(
+            "Can not use metatensor action without the corresponding libraries. \n"
+            "Make sure to configure with `--enable-metatensor --enable-libtorch` "
+            "and that the corresponding libraries are found"
+        );
+    }
+
+    void calculate() override {}
+    void apply() override {}
+    unsigned getNumberOfDerivatives() override {return 0;}
+};
+
+}} // namespace PLMD::metatensor
+
+#else
+
+#include <type_traits>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#pragma GCC diagnostic ignored "-Wfloat-conversion"
+#pragma GCC diagnostic ignored "-Wimplicit-float-conversion"
+#pragma GCC diagnostic ignored "-Wimplicit-int-conversion"
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#pragma GCC diagnostic ignored "-Wsign-conversion"
+#pragma GCC diagnostic ignored "-Wold-style-cast"
+
+#include <torch/script.h>
+#include <torch/version.h>
+#include <torch/cuda.h>
+#if TORCH_VERSION_MAJOR >= 2
+#include <torch/mps.h>
+#endif
+
+#pragma GCC diagnostic pop
+
+#include <metatensor/torch.hpp>
+#include <metatensor/torch/atomistic.hpp>
+
+#include "vesin.h"
+
+
+namespace PLMD {
+namespace metatensor {
+
+// We will cast Vector/Tensor to pointers to arrays and doubles, so let's make
+// sure this is legal to do
+static_assert(std::is_standard_layout<PLMD::Vector>::value);
+static_assert(sizeof(PLMD::Vector) == sizeof(std::array<double, 3>));
+static_assert(alignof(PLMD::Vector) == alignof(std::array<double, 3>));
+
+static_assert(std::is_standard_layout<PLMD::Tensor>::value);
+static_assert(sizeof(PLMD::Tensor) == sizeof(std::array<std::array<double, 3>, 3>));
+static_assert(alignof(PLMD::Tensor) == alignof(std::array<std::array<double, 3>, 3>));
+
+class MetatensorPlumedAction: public ActionAtomistic, public ActionWithValue {
+public:
+    static void registerKeywords(Keywords& keys);
+    explicit MetatensorPlumedAction(const ActionOptions&);
+
+    void calculate() override;
+    void apply() override;
+    unsigned getNumberOfDerivatives() override;
+
+private:
+    // fill this->system_ according to the current PLUMED data
+    void createSystem();
+    // compute a neighbor list following metatensor format, using data from PLUMED
+    metatensor_torch::TorchTensorBlock computeNeighbors(
+        metatensor_torch::NeighborListOptions request,
+        const std::vector<PLMD::Vector>& positions,
+        const PLMD::Tensor& cell
+    );
+
+    // execute the model for the given system
+    metatensor_torch::TorchTensorBlock executeModel(metatensor_torch::System system);
+
+    torch::jit::Module model_;
+
+    // neighbor lists requests made by the model
+    metatensor_torch::ModelCapabilities capabilities_;
+    std::vector<metatensor_torch::NeighborListOptions> nl_requests_;
+
+    // dtype/device to use to execute the model
+    torch::ScalarType dtype_;
+    torch::Device device_;
+
+    torch::Tensor atomic_types_;
+    // store the strain to be able to compute the virial with autograd
+    torch::Tensor strain_;
+
+    metatensor_torch::System system_;
+    metatensor_torch::ModelEvaluationOptions evaluations_options_;
+    bool check_consistency_;
+
+    metatensor_torch::TorchTensorMap output_;
+    // shape of the output of this model
+    unsigned n_samples_;
+    unsigned n_properties_;
+};
+
+
+MetatensorPlumedAction::MetatensorPlumedAction(const ActionOptions& options):
+    Action(options),
+    ActionAtomistic(options),
+    ActionWithValue(options),
+    device_(torch::kCPU)
+{
+    if (metatensor_torch::version().find("0.5.") != 0) {
+        this->error(
+            "this code requires version 0.5.x of metatensor-torch, got version " +
+            metatensor_torch::version()
+        );
+    }
+
+    // first, load the model
+    std::string extensions_directory_str;
+    this->parse("EXTENSIONS_DIRECTORY", extensions_directory_str);
+
+    torch::optional<std::string> extensions_directory = torch::nullopt;
+    if (!extensions_directory_str.empty()) {
+        extensions_directory = std::move(extensions_directory_str);
+    }
+
+    std::string model_path;
+    this->parse("MODEL", model_path);
+
+    try {
+        this->model_ = metatensor_torch::load_atomistic_model(model_path, extensions_directory);
+    } catch (const std::exception& e) {
+        this->error("failed to load model at '" + model_path + "': " + e.what());
+    }
+
+    // extract information from the model
+    auto metadata = this->model_.run_method("metadata").toCustomClass<metatensor_torch::ModelMetadataHolder>();
+    this->capabilities_ = this->model_.run_method("capabilities").toCustomClass<metatensor_torch::ModelCapabilitiesHolder>();
+    auto requests_ivalue = this->model_.run_method("requested_neighbor_lists");
+    for (auto request_ivalue: requests_ivalue.toList()) {
+        auto request = request_ivalue.get().toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
+        this->nl_requests_.push_back(request);
+    }
+
+    log.printf("\n%s\n", metadata->print().c_str());
+    // add the model references to PLUMED citation handling mechanism
+    for (const auto& it: metadata->references) {
+        for (const auto& ref: it.value()) {
+            this->cite(ref);
+        }
+    }
+
+    // parse the atomic types from the input file
+    std::vector<int32_t> atomic_types;
+    std::vector<int32_t> species_to_types;
+    this->parseVector("SPECIES_TO_TYPES", species_to_types);
+    bool has_custom_types = !species_to_types.empty();
+
+    std::vector<AtomNumber> all_atoms;
+    this->parseAtomList("SPECIES", all_atoms);
+
+    size_t n_species = 0;
+    if (all_atoms.empty()) {
+        std::vector<AtomNumber> t;
+        int i = 0;
+        while (true) {
+            i += 1;
+            this->parseAtomList("SPECIES", i, t);
+            if (t.empty()) {
+                break;
+            }
+
+            int32_t type = i;
+            if (has_custom_types) {
+                if (species_to_types.size() < static_cast<size_t>(i)) {
+                    this->error(
+                        "SPECIES_TO_TYPES is too small, it should have one entry "
+                        "for each species (we have at least " + std::to_string(i) +
+                        " species and " + std::to_string(species_to_types.size()) +
+                        "entries in SPECIES_TO_TYPES)"
+                    );
+                }
+
+                type = species_to_types[static_cast<size_t>(i - 1)];
+            }
+
+            log.printf("  atoms with type %d are: ", type);
+            for(unsigned j=0; j<t.size(); j++) {
+                log.printf("%d ", t[j]);
+                all_atoms.push_back(t[j]);
+                atomic_types.push_back(type);
+            }
+            log.printf("\n"); t.resize(0);
+
+            n_species += 1;
+        }
+    } else {
+        n_species = 1;
+
+        int32_t type = 1;
+        if (has_custom_types) {
+            type = species_to_types[0];
+        }
+        atomic_types.resize(all_atoms.size(), type);
+    }
+
+    if (has_custom_types && species_to_types.size() != n_species) {
+        this->warning(
+            "SPECIES_TO_TYPES contains more entries (" +
+            std::to_string(species_to_types.size()) +
+            ") than there where species (" + std::to_string(n_species) + ")"
+        );
+    }
+
+    this->atomic_types_ = torch::tensor(std::move(atomic_types));
+    this->requestAtoms(all_atoms);
+
+    bool no_consistency_check = false;
+    this->parseFlag("NO_CONSISTENCY_CHECK", no_consistency_check);
+    this->check_consistency_ = !no_consistency_check;
+    if (this->check_consistency_) {
+        log.printf("  checking for internal consistency of the model\n");
+    }
+
+    // create evaluation options for the model. These won't change during the
+    // simulation, so we initialize them once here.
+    evaluations_options_ = torch::make_intrusive<metatensor_torch::ModelEvaluationOptionsHolder>();
+    evaluations_options_->set_length_unit(getUnits().getLengthString());
+
+    if (!this->capabilities_->outputs().contains("plumed::cv")) {
+        auto existing_outputs = std::vector<std::string>();
+        for (const auto& it: this->capabilities_->outputs()) {
+            existing_outputs.push_back(it.key());
+        }
+
+        this->error(
+            "expected 'plumed::cv' in the capabilities of the model, could not find it. "
+            "the following outputs exist: " + torch::str(existing_outputs)
+        );
+    }
+
+    auto output = torch::make_intrusive<metatensor_torch::ModelOutputHolder>();
+    // this output has no quantity or unit to set
+
+    output->per_atom = this->capabilities_->outputs().at("plumed::cv")->per_atom;
+    // we are using torch autograd system to compute gradients,
+    // so we don't need any explicit gradients.
+    output->explicit_gradients = {};
+    evaluations_options_->outputs.insert("plumed::cv", output);
+
+    // Determine which device we should use based on user input, what the model
+    // supports and what's available
+    auto available_devices = std::vector<torch::Device>();
+    for (const auto& device: this->capabilities_->supported_devices) {
+        if (device == "cpu") {
+            available_devices.push_back(torch::kCPU);
+        } else if (device == "cuda") {
+            if (torch::cuda::is_available()) {
+                available_devices.push_back(torch::Device("cuda"));
+            }
+        } else if (device == "mps") {
+            #if TORCH_VERSION_MAJOR >= 2
+            if (torch::mps::is_available()) {
+                available_devices.push_back(torch::Device("mps"));
+            }
+            #endif
+        } else {
+            this->warning(
+                "the model declared support for unknown device '" + device +
+                "', it will be ignored"
+            );
+        }
+    }
+
+    if (available_devices.empty()) {
+        this->error(
+            "failed to find a valid device for the model at '" + model_path + "': "
+            "the model supports " + torch::str(this->capabilities_->supported_devices) +
+            ", none of these where available"
+        );
+    }
+
+    std::string requested_device;
+    this->parse("DEVICE", requested_device);
+    if (requested_device.empty()) {
+        // no user request, pick the device the model prefers
+        this->device_ = available_devices[0];
+    } else {
+        bool found_requested_device = false;
+        for (const auto& device: available_devices) {
+            if (device.is_cpu() && requested_device == "cpu") {
+                this->device_ = device;
+                found_requested_device = true;
+                break;
+            } else if (device.is_cuda() && requested_device == "cuda") {
+                this->device_ = device;
+                found_requested_device = true;
+                break;
+            } else if (device.is_mps() && requested_device == "mps") {
+                this->device_ = device;
+                found_requested_device = true;
+                break;
+            }
+        }
+
+        if (!found_requested_device) {
+            this->error(
+                "failed to find requested device (" + requested_device + "): it is either "
+                "not supported by this model or not available on this machine"
+            );
+        }
+    }
+
+    this->model_.to(this->device_);
+    this->atomic_types_ = this->atomic_types_.to(this->device_);
+
+    log.printf(
+        "  running model on %s device with %s data\n",
+        this->device_.str().c_str(),
+        this->capabilities_->dtype().c_str()
+    );
+
+    if (this->capabilities_->dtype() == "float64") {
+        this->dtype_ = torch::kFloat64;
+    } else if (this->capabilities_->dtype() == "float32") {
+        this->dtype_ = torch::kFloat32;
+    } else {
+        this->error(
+            "the model requested an unsupported dtype '" + this->capabilities_->dtype() + "'"
+        );
+    }
+
+    auto tensor_options = torch::TensorOptions().dtype(this->dtype_).device(this->device_);
+    this->strain_ = torch::eye(3, tensor_options.requires_grad(true));
+
+    // determine how many properties there will be in the output by running the
+    // model once on a dummy system
+    auto dummy_system = torch::make_intrusive<metatensor_torch::SystemHolder>(
+        /*types = */ torch::zeros({0}, tensor_options.dtype(torch::kInt32)),
+        /*positions = */ torch::zeros({0, 3}, tensor_options),
+        /*cell = */ torch::zeros({3, 3}, tensor_options)
+    );
+
+    log.printf("  the following neighbor lists have been requested:\n");
+    auto length_unit = this->getUnits().getLengthString();
+    auto model_length_unit = this->capabilities_->length_unit();
+    for (auto request: this->nl_requests_) {
+        log.printf("    - %s list, %g %s cutoff (requested %g %s)\n",
+            request->full_list() ? "full" : "half",
+            request->engine_cutoff(length_unit),
+            length_unit.c_str(),
+            request->cutoff(),
+            model_length_unit.c_str()
+        );
+
+        auto neighbors = this->computeNeighbors(request, {PLMD::Vector(0, 0, 0)}, PLMD::Tensor(0, 0, 0, 0, 0, 0, 0, 0, 0));
+        metatensor_torch::register_autograd_neighbors(dummy_system, neighbors, this->check_consistency_);
+        dummy_system->add_neighbor_list(request, neighbors);
+    }
+
+    this->n_properties_ = static_cast<unsigned>(
+        this->executeModel(dummy_system)->properties()->count()
+    );
+
+    // parse and handle atom sub-selection. This is done AFTER determining the
+    // output size, since the selection might not be valid for the dummy system
+    std::vector<int32_t> selected_atoms;
+    this->parseVector("SELECTED_ATOMS", selected_atoms);
+    if (!selected_atoms.empty()) {
+        auto selection_value = torch::zeros(
+            {static_cast<int64_t>(selected_atoms.size()), 2},
+            torch::TensorOptions().dtype(torch::kInt32).device(this->device_)
+        );
+
+        for (unsigned i=0; i<selected_atoms.size(); i++) {
+            auto n_atoms = static_cast<int32_t>(this->atomic_types_.size(0));
+            if (selected_atoms[i] <= 0 || selected_atoms[i] > n_atoms) {
+                this->error(
+                    "Values in metatensor's SELECTED_ATOMS should be between 1 "
+                    "and the number of atoms (" + std::to_string(n_atoms) + "), "
+                    "got " + std::to_string(selected_atoms[i]));
+            }
+            // PLUMED input uses 1-based indexes, but metatensor wants 0-based
+            selection_value[i][1] = selected_atoms[i] - 1;
+        }
+
+        evaluations_options_->set_selected_atoms(
+            torch::make_intrusive<metatensor_torch::LabelsHolder>(
+                std::vector<std::string>{"system", "atom"}, selection_value
+            )
+        );
+    }
+
+    // Now that we now both n_samples and n_properties, we can setup the
+    // PLUMED-side storage for the computed CV
+    if (output->per_atom) {
+        if (selected_atoms.empty()) {
+            this->n_samples_ = static_cast<unsigned>(this->atomic_types_.size(0));
+        } else {
+            this->n_samples_ = static_cast<unsigned>(selected_atoms.size());
+        }
+    } else {
+        this->n_samples_ = 1;
+    }
+
+    if (n_samples_ == 1 && n_properties_ == 1) {
+        log.printf("  the output of this model is a scalar\n");
+
+        this->addValue();
+    } else if (n_samples_ == 1) {
+        log.printf("  the output of this model is 1x%d vector\n", n_properties_);
+
+        this->addValue({this->n_properties_});
+        this->getPntrToComponent(0)->buildDataStore();
+    } else if (n_properties_ == 1) {
+        log.printf("  the output of this model is %dx1 vector\n", n_samples_);
+
+        this->addValue({this->n_samples_});
+        this->getPntrToComponent(0)->buildDataStore();
+    } else {
+        log.printf("  the output of this model is a %dx%d matrix\n", n_samples_, n_properties_);
+
+        this->addValue({this->n_samples_, this->n_properties_});
+        this->getPntrToComponent(0)->buildDataStore();
+        this->getPntrToComponent(0)->reshapeMatrixStore(n_properties_);
+    }
+
+    this->setNotPeriodic();
+}
+
+unsigned MetatensorPlumedAction::getNumberOfDerivatives() {
+    // gradients w.r.t. positions (3 x N values) + gradients w.r.t. strain (9 values)
+    return 3 * this->getNumberOfAtoms() + 9;
+}
+
+
+void MetatensorPlumedAction::createSystem() {
+    if (this->getTotAtoms() != static_cast<unsigned>(this->atomic_types_.size(0))) {
+        std::ostringstream oss;
+        oss << "METATENSOR action needs to know about all atoms in the system. ";
+        oss << "There are " << this->getTotAtoms() << " atoms overall, ";
+        oss << "but we only have atomic types for " << this->atomic_types_.size(0) << " of them.";
+        plumed_merror(oss.str());
+    }
+
+    // this->getTotAtoms()
+
+    const auto& cell = this->getPbc().getBox();
+
+    auto cpu_f64_tensor = torch::TensorOptions().dtype(torch::kFloat64).device(torch::kCPU);
+    auto torch_cell = torch::zeros({3, 3}, cpu_f64_tensor);
+
+    // TODO: check if cell is stored in row or column major order
+    // TODO: check if cell is zero for non-periodic systems
+    torch_cell[0][0] = cell(0, 0);
+    torch_cell[0][1] = cell(0, 1);
+    torch_cell[0][2] = cell(0, 2);
+
+    torch_cell[1][0] = cell(1, 0);
+    torch_cell[1][1] = cell(1, 1);
+    torch_cell[1][2] = cell(1, 2);
+
+    torch_cell[2][0] = cell(2, 0);
+    torch_cell[2][1] = cell(2, 1);
+    torch_cell[2][2] = cell(2, 2);
+
+    const auto& positions = this->getPositions();
+
+    auto torch_positions = torch::from_blob(
+        const_cast<PLMD::Vector*>(positions.data()),
+        {static_cast<int64_t>(positions.size()), 3},
+        cpu_f64_tensor
+    );
+
+    torch_positions = torch_positions.to(this->dtype_).to(this->device_);
+    torch_cell = torch_cell.to(this->dtype_).to(this->device_);
+
+    // setup torch's automatic gradient tracking
+    if (!this->doNotCalculateDerivatives()) {
+        torch_positions.requires_grad_(true);
+
+        // pretend to scale positions/cell by the strain so that it enters the
+        // computational graph.
+        torch_positions = torch_positions.matmul(this->strain_);
+        torch_positions.retain_grad();
+
+        torch_cell = torch_cell.matmul(this->strain_);
+    }
+
+    this->system_ = torch::make_intrusive<metatensor_torch::SystemHolder>(
+        this->atomic_types_,
+        torch_positions,
+        torch_cell
+    );
+
+    // compute the neighbors list requested by the model, and register them with
+    // the system
+    for (auto request: this->nl_requests_) {
+        auto neighbors = this->computeNeighbors(request, positions, cell);
+        metatensor_torch::register_autograd_neighbors(this->system_, neighbors, this->check_consistency_);
+        this->system_->add_neighbor_list(request, neighbors);
+    }
+}
+
+
+metatensor_torch::TorchTensorBlock MetatensorPlumedAction::computeNeighbors(
+    metatensor_torch::NeighborListOptions request,
+    const std::vector<PLMD::Vector>& positions,
+    const PLMD::Tensor& cell
+) {
+    auto labels_options = torch::TensorOptions().dtype(torch::kInt32).device(this->device_);
+    auto neighbor_component = torch::make_intrusive<metatensor_torch::LabelsHolder>(
+        "xyz",
+        torch::tensor({0, 1, 2}, labels_options).reshape({3, 1})
+    );
+    auto neighbor_properties = torch::make_intrusive<metatensor_torch::LabelsHolder>(
+        "distance", torch::zeros({1, 1}, labels_options)
+    );
+
+    auto cutoff = request->engine_cutoff(this->getUnits().getLengthString());
+
+    auto non_periodic = (
+        cell(0, 0) == 0.0 && cell(0, 1) == 0.0 && cell(0, 2) == 0.0 &&
+        cell(1, 0) == 0.0 && cell(1, 1) == 0.0 && cell(1, 2) == 0.0 &&
+        cell(2, 0) == 0.0 && cell(2, 2) == 0.0 && cell(2, 2) == 0.0
+    );
+
+    // use https://github.com/Luthaf/vesin to compute the requested neighbor
+    // lists since we can not get these from PLUMED
+    vesin::VesinOptions options;
+    options.cutoff = cutoff;
+    options.full = request->full_list();
+    options.return_shifts = true;
+    options.return_distances = false;
+    options.return_vectors = true;
+
+    vesin::VesinNeighborList* vesin_neighbor_list = new vesin::VesinNeighborList();
+    memset(vesin_neighbor_list, 0, sizeof(vesin::VesinNeighborList));
+
+    const char* error_message = NULL;
+    int status = vesin_neighbors(
+        reinterpret_cast<const double (*)[3]>(positions.data()),
+        positions.size(),
+        reinterpret_cast<const double (*)[3]>(&cell(0, 0)),
+        !non_periodic,
+        vesin::VesinCPU,
+        options,
+        vesin_neighbor_list,
+        &error_message
+    );
+
+    if (status != EXIT_SUCCESS) {
+        plumed_merror(
+            "failed to compute neighbor list (cutoff=" + std::to_string(cutoff) +
+            ", full=" + (request->full_list() ? "true" : "false") + "): " + error_message
+        );
+    }
+
+    // transform from vesin to metatensor format
+    auto n_pairs = static_cast<int64_t>(vesin_neighbor_list->length);
+
+    auto pair_vectors = torch::from_blob(
+        vesin_neighbor_list->vectors,
+        {n_pairs, 3, 1},
+        /*deleter*/ [=](void*) {
+            vesin_free(vesin_neighbor_list);
+            delete vesin_neighbor_list;
+        },
+        torch::TensorOptions().dtype(torch::kFloat64).device(torch::kCPU)
+    );
+
+    auto pair_samples_values = torch::zeros({n_pairs, 5}, labels_options.device(torch::kCPU));
+    for (unsigned i=0; i<n_pairs; i++) {
+        pair_samples_values[i][0] = static_cast<int32_t>(vesin_neighbor_list->pairs[i][0]);
+        pair_samples_values[i][1] = static_cast<int32_t>(vesin_neighbor_list->pairs[i][1]);
+        pair_samples_values[i][2] = vesin_neighbor_list->shifts[i][0];
+        pair_samples_values[i][3] = vesin_neighbor_list->shifts[i][1];
+        pair_samples_values[i][4] = vesin_neighbor_list->shifts[i][2];
+    }
+
+    auto neighbor_samples = torch::make_intrusive<metatensor_torch::LabelsHolder>(
+        std::vector<std::string>{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"},
+        pair_samples_values.to(this->device_)
+    );
+
+    auto neighbors = torch::make_intrusive<metatensor_torch::TensorBlockHolder>(
+        pair_vectors.to(this->dtype_).to(this->device_),
+        neighbor_samples,
+        std::vector<metatensor_torch::TorchLabels>{neighbor_component},
+        neighbor_properties
+    );
+
+    return neighbors;
+}
+
+metatensor_torch::TorchTensorBlock MetatensorPlumedAction::executeModel(metatensor_torch::System system) {
+    try {
+        auto ivalue_output = this->model_.forward({
+            std::vector<metatensor_torch::System>{system},
+            evaluations_options_,
+            this->check_consistency_,
+        });
+
+        auto dict_output = ivalue_output.toGenericDict();
+        auto cv = dict_output.at("plumed::cv");
+        this->output_ = cv.toCustomClass<metatensor_torch::TensorMapHolder>();
+    } catch (const std::exception& e) {
+        plumed_merror("failed to evaluate the model: " + std::string(e.what()));
+    }
+
+    plumed_massert(this->output_->keys()->count() == 1, "output should have a single block");
+    auto block = metatensor_torch::TensorMapHolder::block_by_id(this->output_, 0);
+    plumed_massert(block->components().empty(), "components are not yet supported in the output");
+
+    return block;
+}
+
+
+void MetatensorPlumedAction::calculate() {
+    this->createSystem();
+
+    auto block = this->executeModel(this->system_);
+    auto torch_values = block->values().to(torch::kCPU).to(torch::kFloat64);
+
+    if (static_cast<unsigned>(torch_values.size(0)) != this->n_samples_) {
+        plumed_merror(
+            "expected the model to return a TensorBlock with " +
+            std::to_string(this->n_samples_) + " samples, got " +
+            std::to_string(torch_values.size(0)) + " instead"
+        );
+    } else if (static_cast<unsigned>(torch_values.size(1)) != this->n_properties_) {
+        plumed_merror(
+            "expected the model to return a TensorBlock with " +
+            std::to_string(this->n_properties_) + " properties, got " +
+            std::to_string(torch_values.size(1)) + " instead"
+        );
+    }
+
+    Value* value = this->getPntrToComponent(0);
+    // reshape the plumed `Value` to hold the data returned by the model
+    if (n_samples_ == 1) {
+        if (n_properties_ == 1) {
+            value->set(torch_values.item<double>());
+        } else {
+            // we have multiple CV describing a single thing (atom or full system)
+            for (unsigned i=0; i<n_properties_; i++) {
+                value->set(i, torch_values[0][i].item<double>());
+            }
+        }
+    } else {
+        auto samples = block->samples();
+        plumed_assert((samples->names() == std::vector<std::string>{"system", "atom"}));
+
+        auto samples_values = samples->values().to(torch::kCPU);
+        auto selected_atoms = this->evaluations_options_->get_selected_atoms();
+
+        // handle the possibility that samples are returned in
+        // a non-sorted order.
+        auto get_output_location = [&](unsigned i) {
+            if (selected_atoms.has_value()) {
+                // If the users picked some selected atoms, then we store the
+                // output in the same order as the selection was given
+                auto sample = samples_values.index({static_cast<int64_t>(i), torch::indexing::Slice()});
+                auto position = selected_atoms.value()->position(sample);
+                plumed_assert(position.has_value());
+                return static_cast<unsigned>(position.value());
+            } else {
+                return static_cast<unsigned>(samples_values[i][1].item<int32_t>());
+            }
+        };
+
+        if (n_properties_ == 1) {
+            // we have a single CV describing multiple things (i.e. atoms)
+            for (unsigned i=0; i<n_samples_; i++) {
+                auto output_i = get_output_location(i);
+                value->set(output_i, torch_values[i][0].item<double>());
+            }
+        } else {
+            // the CV is a matrix
+            for (unsigned i=0; i<n_samples_; i++) {
+                auto output_i = get_output_location(i);
+                for (unsigned j=0; j<n_properties_; j++) {
+                    value->set(output_i * n_properties_ + j, torch_values[i][j].item<double>());
+                }
+            }
+        }
+    }
+}
+
+
+void MetatensorPlumedAction::apply() {
+    const auto* value = this->getPntrToComponent(0);
+    if (!value->forcesWereAdded()) {
+        return;
+    }
+
+    auto block = metatensor_torch::TensorMapHolder::block_by_id(this->output_, 0);
+    auto torch_values = block->values().to(torch::kCPU).to(torch::kFloat64);
+
+    auto output_grad = torch::zeros_like(torch_values);
+    if (n_samples_ == 1) {
+        if (n_properties_ == 1) {
+            output_grad[0][0] = value->getForce();
+        } else {
+            for (unsigned i=0; i<n_properties_; i++) {
+                output_grad[0][i] = value->getForce(i);
+            }
+        }
+    } else {
+        auto samples = block->samples();
+        plumed_assert((samples->names() == std::vector<std::string>{"system", "atom"}));
+
+        auto samples_values = samples->values().to(torch::kCPU);
+        auto selected_atoms = this->evaluations_options_->get_selected_atoms();
+
+        // see above for an explanation of why we use this function
+        auto get_output_location = [&](unsigned i) {
+            if (selected_atoms.has_value()) {
+                auto sample = samples_values.index({static_cast<int64_t>(i), torch::indexing::Slice()});
+                auto position = selected_atoms.value()->position(sample);
+                plumed_assert(position.has_value());
+                return static_cast<unsigned>(position.value());
+            } else {
+                return static_cast<unsigned>(samples_values[i][1].item<int32_t>());
+            }
+        };
+
+        if (n_properties_ == 1) {
+            for (unsigned i=0; i<n_samples_; i++) {
+                auto output_i = get_output_location(i);
+                output_grad[i][0] = value->getForce(output_i);
+            }
+        } else {
+            for (unsigned i=0; i<n_samples_; i++) {
+                auto output_i = get_output_location(i);
+                for (unsigned j=0; j<n_properties_; j++) {
+                    output_grad[i][j] = value->getForce(output_i * n_properties_ + j);
+                }
+            }
+        }
+    }
+
+    this->system_->positions().mutable_grad() = torch::Tensor();
+    this->strain_.mutable_grad() = torch::Tensor();
+
+    torch_values.backward(output_grad);
+    auto positions_grad = this->system_->positions().grad();
+    auto strain_grad = this->strain_.grad();
+
+    positions_grad = positions_grad.to(torch::kCPU).to(torch::kFloat64);
+    strain_grad = strain_grad.to(torch::kCPU).to(torch::kFloat64);
+
+    plumed_assert(positions_grad.sizes().size() == 2);
+    plumed_assert(positions_grad.is_contiguous());
+
+    plumed_assert(strain_grad.sizes().size() == 2);
+    plumed_assert(strain_grad.is_contiguous());
+
+    auto derivatives = std::vector<double>(
+        positions_grad.data_ptr<double>(),
+        positions_grad.data_ptr<double>() + 3 * this->system_->size()
+    );
+
+    // add virials to the derivatives
+    derivatives.push_back(-strain_grad[0][0].item<double>());
+    derivatives.push_back(-strain_grad[0][1].item<double>());
+    derivatives.push_back(-strain_grad[0][2].item<double>());
+
+    derivatives.push_back(-strain_grad[1][0].item<double>());
+    derivatives.push_back(-strain_grad[1][1].item<double>());
+    derivatives.push_back(-strain_grad[1][2].item<double>());
+
+    derivatives.push_back(-strain_grad[2][0].item<double>());
+    derivatives.push_back(-strain_grad[2][1].item<double>());
+    derivatives.push_back(-strain_grad[2][2].item<double>());
+
+    unsigned index = 0;
+    this->setForcesOnAtoms(derivatives, index);
+}
+
+} // namespace metatensor
+} // namespace PLMD
+
+
+#endif
+
+
+namespace PLMD {
+namespace metatensor {
+
+// use the same implementation for both the actual action and the dummy one
+// (when libtorch and libmetatensor could not be found).
+void MetatensorPlumedAction::registerKeywords(Keywords& keys) {
+    Action::registerKeywords(keys);
+    ActionAtomistic::registerKeywords(keys);
+    ActionWithValue::registerKeywords(keys);
+
+    keys.add("compulsory", "MODEL", "path to the exported metatensor model");
+    keys.add("optional", "EXTENSIONS_DIRECTORY", "path to the directory containing TorchScript extensions to load");
+    keys.add("optional", "DEVICE", "Torch device to use for the calculation");
+
+    // TODO: change the default?
+    keys.addFlag("NO_CONSISTENCY_CHECK", false, "Should we disable internal consistency of the model");
+
+    keys.add("numbered", "SPECIES", "the atoms in each PLUMED species");
+    keys.reset_style("SPECIES", "atoms");
+
+    keys.add("optional", "SELECTED_ATOMS", "subset of atoms that should be used for the calculation");
+    keys.reset_style("SELECTED_ATOMS", "atoms");
+
+    keys.add("optional", "SPECIES_TO_TYPES", "mapping from PLUMED SPECIES to metatensor's atomic types");
+
+    keys.addOutputComponent("outputs", "default", "collective variable created by the model");
+}
+
+PLUMED_REGISTER_ACTION(MetatensorPlumedAction, "METATENSOR")
+
+} // namespace metatensor
+} // namespace PLMD
diff --git a/src/metatensor/module.type b/src/metatensor/module.type
new file mode 100644
index 0000000000..de83273033
--- /dev/null
+++ b/src/metatensor/module.type
@@ -0,0 +1 @@
+default-off
diff --git a/src/metatensor/patch-torch.sh b/src/metatensor/patch-torch.sh
new file mode 100755
index 0000000000..8ec7cafac0
--- /dev/null
+++ b/src/metatensor/patch-torch.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# This pulls in the fix from https://github.com/pytorch/pytorch/pull/119945
+# until it is properly released
+
+set -eu
+
+TORCH_PREFIX=$1
+
+if [ -f "$TORCH_PREFIX/share/cmake/Caffe2/public/mkl.cmake" ]; then
+    MKL_CMAKE="$TORCH_PREFIX/share/cmake/Caffe2/public/mkl.cmake"
+elif [ -f "$TORCH_PREFIX/Caffe2/public/mkl.cmake" ]; then
+    MKL_CMAKE="$TORCH_PREFIX/Caffe2/public/mkl.cmake"
+else
+    echo "Failed to find mkl.cmake in '$TORCH_PREFIX'"
+    exit 1
+fi
+
+cat > "$MKL_CMAKE" << EOF
+
+find_package(MKL QUIET)
+
+if(TARGET caffe2::mkl)
+  return()
+endif()
+
+add_library(caffe2::mkl INTERFACE IMPORTED)
+target_include_directories(caffe2::mkl INTERFACE \${MKL_INCLUDE_DIR})
+target_link_libraries(caffe2::mkl INTERFACE \${MKL_LIBRARIES})
+foreach(MKL_LIB IN LISTS MKL_LIBRARIES)
+  if(EXISTS "\${MKL_LIB}")
+    get_filename_component(MKL_LINK_DIR "\${MKL_LIB}" DIRECTORY)
+    if(IS_DIRECTORY "\${MKL_LINK_DIR}")
+      target_link_directories(caffe2::mkl INTERFACE "\${MKL_LINK_DIR}")
+    endif()
+  endif()
+endforeach()
+
+# TODO: This is a hack, it will not pick up architecture dependent
+# MKL libraries correctly; see https://github.com/pytorch/pytorch/issues/73008
+set_property(
+  TARGET caffe2::mkl PROPERTY INTERFACE_LINK_DIRECTORIES
+  \${MKL_ROOT}/lib \${MKL_ROOT}/lib/intel64 \${MKL_ROOT}/lib/intel64_win \${MKL_ROOT}/lib/win-x64)
+
+EOF
diff --git a/src/metatensor/vesin.cpp b/src/metatensor/vesin.cpp
new file mode 100644
index 0000000000..a03f950e77
--- /dev/null
+++ b/src/metatensor/vesin.cpp
@@ -0,0 +1,876 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Copyright (c) 2024 The METATENSOR code team
+(see the PEOPLE-METATENSOR file at the root of this folder for a list of names)
+
+See https://docs.metatensor.org/latest/ for more information about the
+metatensor package that this module allows you to call from PLUMED.
+
+This file is part of METATENSOR-PLUMED module.
+
+The METATENSOR-PLUMED module is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The METATENSOR-PLUMED module is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the METATENSOR-PLUMED module. If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+/*INDENT-OFF*/
+#include "vesin.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+#include <algorithm>
+#include <tuple>
+#include <new>
+
+#ifndef VESIN_CPU_CELL_LIST_HPP
+#define VESIN_CPU_CELL_LIST_HPP
+
+#include <vector>
+
+#include "vesin.h"
+
+#ifndef VESIN_TYPES_HPP
+#define VESIN_TYPES_HPP
+
+#ifndef VESIN_MATH_HPP
+#define VESIN_MATH_HPP
+
+#include <array>
+#include <cmath>
+#include <stdexcept>
+
+namespace PLMD {
+namespace metatensor {
+namespace vesin {
+struct Vector;
+
+Vector operator*(Vector vector, double scalar);
+
+struct Vector: public std::array<double, 3> {
+    double dot(Vector other) const {
+        return (*this)[0] * other[0] + (*this)[1] * other[1] + (*this)[2] * other[2];
+    }
+
+    double norm() const {
+        return std::sqrt(this->dot(*this));
+    }
+
+    Vector normalize() const {
+        return *this * (1.0 / this->norm());
+    }
+
+    Vector cross(Vector other) const {
+        return Vector{
+            (*this)[1] * other[2] - (*this)[2] * other[1],
+            (*this)[2] * other[0] - (*this)[0] * other[2],
+            (*this)[0] * other[1] - (*this)[1] * other[0],
+        };
+    }
+};
+
+inline Vector operator+(Vector u, Vector v) {
+    return Vector{
+        u[0] + v[0],
+        u[1] + v[1],
+        u[2] + v[2],
+    };
+}
+
+inline Vector operator-(Vector u, Vector v) {
+    return Vector{
+        u[0] - v[0],
+        u[1] - v[1],
+        u[2] - v[2],
+    };
+}
+
+inline Vector operator*(double scalar, Vector vector) {
+    return Vector{
+        scalar * vector[0],
+        scalar * vector[1],
+        scalar * vector[2],
+    };
+}
+
+inline Vector operator*(Vector vector, double scalar) {
+    return Vector{
+        scalar * vector[0],
+        scalar * vector[1],
+        scalar * vector[2],
+    };
+}
+
+
+struct Matrix: public std::array<std::array<double, 3>, 3> {
+    double determinant() const {
+        return (*this)[0][0] * ((*this)[1][1] * (*this)[2][2] - (*this)[2][1] * (*this)[1][2])
+             - (*this)[0][1] * ((*this)[1][0] * (*this)[2][2] - (*this)[1][2] * (*this)[2][0])
+             + (*this)[0][2] * ((*this)[1][0] * (*this)[2][1] - (*this)[1][1] * (*this)[2][0]);
+    }
+
+    Matrix inverse() const {
+        auto det = this->determinant();
+
+        if (std::abs(det) < 1e-30) {
+            throw std::runtime_error("this matrix is not invertible");
+        }
+
+        auto inverse = Matrix();
+        inverse[0][0] = ((*this)[1][1] * (*this)[2][2] - (*this)[2][1] * (*this)[1][2]) / det;
+        inverse[0][1] = ((*this)[0][2] * (*this)[2][1] - (*this)[0][1] * (*this)[2][2]) / det;
+        inverse[0][2] = ((*this)[0][1] * (*this)[1][2] - (*this)[0][2] * (*this)[1][1]) / det;
+        inverse[1][0] = ((*this)[1][2] * (*this)[2][0] - (*this)[1][0] * (*this)[2][2]) / det;
+        inverse[1][1] = ((*this)[0][0] * (*this)[2][2] - (*this)[0][2] * (*this)[2][0]) / det;
+        inverse[1][2] = ((*this)[1][0] * (*this)[0][2] - (*this)[0][0] * (*this)[1][2]) / det;
+        inverse[2][0] = ((*this)[1][0] * (*this)[2][1] - (*this)[2][0] * (*this)[1][1]) / det;
+        inverse[2][1] = ((*this)[2][0] * (*this)[0][1] - (*this)[0][0] * (*this)[2][1]) / det;
+        inverse[2][2] = ((*this)[0][0] * (*this)[1][1] - (*this)[1][0] * (*this)[0][1]) / det;
+        return inverse;
+    }
+};
+
+
+inline Vector operator*(Matrix matrix, Vector vector) {
+    return Vector{
+        matrix[0][0] * vector[0] + matrix[0][1] * vector[1] + matrix[0][2] * vector[2],
+        matrix[1][0] * vector[0] + matrix[1][1] * vector[1] + matrix[1][2] * vector[2],
+        matrix[2][0] * vector[0] + matrix[2][1] * vector[1] + matrix[2][2] * vector[2],
+    };
+}
+
+inline Vector operator*(Vector vector, Matrix matrix) {
+    return Vector{
+        vector[0] * matrix[0][0] + vector[1] * matrix[1][0] + vector[2] * matrix[2][0],
+        vector[0] * matrix[0][1] + vector[1] * matrix[1][1] + vector[2] * matrix[2][1],
+        vector[0] * matrix[0][2] + vector[1] * matrix[1][2] + vector[2] * matrix[2][2],
+    };
+}
+
+} // namespace vesin
+} // namespace metatensor
+} // namespace PLMD
+
+#endif
+
+namespace PLMD {
+namespace metatensor {
+namespace vesin {
+
+class BoundingBox {
+public:
+    BoundingBox(Matrix matrix, bool periodic): matrix_(matrix), periodic_(periodic) {
+        if (periodic) {
+            this->inverse_ = matrix_.inverse();
+        } else {
+            this->matrix_ = Matrix{{{
+                {{1, 0, 0}},
+                {{0, 1, 0}},
+                {{0, 0, 1}}
+            }}};
+            this->inverse_ = matrix_;
+        }
+    }
+
+    const Matrix& matrix() const {
+        return this->matrix_;
+    }
+
+    bool periodic() const {
+        return this->periodic_;
+    }
+
+    /// Convert a vector from cartesian coordinates to fractional coordinates
+    Vector cartesian_to_fractional(Vector cartesian) const {
+        return cartesian * inverse_;
+    }
+
+    /// Convert a vector from fractional coordinates to cartesian coordinates
+    Vector fractional_to_cartesian(Vector fractional) const {
+        return fractional * matrix_;
+    }
+
+    /// Get the three distances between faces of the bounding box
+    Vector distances_between_faces() const {
+        auto a = Vector{matrix_[0]};
+        auto b = Vector{matrix_[1]};
+        auto c = Vector{matrix_[2]};
+
+        // Plans normal vectors
+        auto na = b.cross(c).normalize();
+        auto nb = c.cross(a).normalize();
+        auto nc = a.cross(b).normalize();
+
+        return Vector{
+            std::abs(na.dot(a)),
+            std::abs(nb.dot(b)),
+            std::abs(nc.dot(c)),
+        };
+    }
+
+private:
+    Matrix matrix_;
+    Matrix inverse_;
+    bool periodic_;
+};
+
+
+/// A cell shift represents the displacement along cell axis between the actual
+/// position of an atom and a periodic image of this atom.
+///
+/// The cell shift can be used to reconstruct the vector between two points,
+/// wrapped inside the unit cell.
+struct CellShift: public std::array<int32_t, 3> {
+    /// Compute the shift vector in cartesian coordinates, using the given cell
+    /// matrix (stored in row major order).
+    Vector cartesian(Matrix cell) const {
+        auto vector = Vector{
+            static_cast<double>((*this)[0]),
+            static_cast<double>((*this)[1]),
+            static_cast<double>((*this)[2]),
+        };
+        return vector * cell;
+    }
+};
+
+inline CellShift operator+(CellShift a, CellShift b) {
+    return CellShift{
+        a[0] + b[0],
+        a[1] + b[1],
+        a[2] + b[2],
+    };
+}
+
+inline CellShift operator-(CellShift a, CellShift b) {
+    return CellShift{
+        a[0] - b[0],
+        a[1] - b[1],
+        a[2] - b[2],
+    };
+}
+
+
+} // namespace vesin
+} // namespace metatensor
+} // namespace PLMD
+
+#endif
+
+namespace PLMD {
+namespace metatensor {
+namespace vesin { namespace cpu {
+
+void free_neighbors(VesinNeighborList& neighbors);
+
+void neighbors(
+    const Vector* points,
+    size_t n_points,
+    BoundingBox cell,
+    VesinOptions options,
+    VesinNeighborList& neighbors
+);
+
+
+/// The cell list is used to sort atoms inside bins/cells.
+///
+/// The list of potential pairs is then constructed by looking through all
+/// neighboring cells (the number of cells to search depends on the cutoff and
+/// the size of the cells) for each atom to create pair candidates.
+class CellList {
+public:
+    /// Create a new `CellList` for the given bounding box and cutoff,
+    /// determining all required parameters.
+    CellList(BoundingBox box, double cutoff);
+
+    /// Add a single point to the cell list at the given `position`. The point
+    /// is uniquely identified by its `index`.
+    void add_point(size_t index, Vector position);
+
+    /// Iterate over all possible pairs, calling the given callback every time
+    template <typename Function>
+    void foreach_pair(Function callback);
+
+private:
+    /// How many cells do we need to look at when searching neighbors to include
+    /// all neighbors below cutoff
+    std::array<int32_t, 3> n_search_;
+
+    /// the cells themselves are a list of points & corresponding
+    /// shift to place the point inside the cell
+    struct Point {
+        size_t index;
+        CellShift shift;
+    };
+    struct Cell: public std::vector<Point> {};
+
+    // raw data for the cells
+    std::vector<Cell> cells_;
+    // shape of the cell array
+    std::array<size_t, 3> cells_shape_;
+
+    BoundingBox box_;
+
+    Cell& get_cell(std::array<int32_t, 3> index);
+};
+
+/// Wrapper around `VesinNeighborList` that behaves like a std::vector,
+/// automatically growing memory allocations.
+class GrowableNeighborList {
+public:
+    VesinNeighborList& neighbors;
+    size_t capacity;
+    VesinOptions options;
+
+    size_t length() const {
+        return neighbors.length;
+    }
+
+    void increment_length() {
+        neighbors.length += 1;
+    }
+
+    void set_pair(size_t index, size_t first, size_t second);
+    void set_shift(size_t index, PLMD::metatensor::vesin::CellShift shift);
+    void set_distance(size_t index, double distance);
+    void set_vector(size_t index, PLMD::metatensor::vesin::Vector vector);
+
+    // reset length to 0, and allocate/deallocate members of
+    // `neighbors` according to `options`
+    void reset();
+
+    // allocate more memory & update capacity
+    void grow();
+};
+
+} // namespace vesin
+} // namespace metatensor
+} // namespace PLMD
+} // namespace cpu
+
+#endif
+
+using namespace PLMD::metatensor::vesin;
+using namespace PLMD::metatensor::vesin::cpu;
+
+void PLMD::metatensor::vesin::cpu::neighbors(
+    const Vector* points,
+    size_t n_points,
+    BoundingBox cell,
+    VesinOptions options,
+    VesinNeighborList& raw_neighbors
+) {
+    auto cell_list = CellList(cell, options.cutoff);
+
+    for (size_t i=0; i<n_points; i++) {
+        cell_list.add_point(i, points[i]);
+    }
+
+    auto cell_matrix = cell.matrix();
+    auto cutoff2 = options.cutoff * options.cutoff;
+
+    // the cell list creates too many pairs, we only need to keep the
+    // one where the distance is actually below the cutoff
+    auto neighbors = GrowableNeighborList{raw_neighbors, raw_neighbors.length, options};
+    neighbors.reset();
+
+    cell_list.foreach_pair([&](size_t first, size_t second, CellShift shift) {
+        if (!options.full) {
+            // filter out some pairs for half neighbor lists
+            if (first > second) {
+                return;
+            }
+
+            if (first == second) {
+                // When creating pairs between a point and one of its periodic
+                // images, the code generate multiple redundant pairs (e.g. with
+                // shifts 0 1 1 and 0 -1 -1); and we want to only keep one of
+                // these.
+                if (shift[0] + shift[1] + shift[2] < 0) {
+                    // drop shifts on the negative half-space
+                    return;
+                }
+
+                if ((shift[0] + shift[1] + shift[2] == 0)
+                    && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0))) {
+                    // drop shifts in the negative half plane or the negative
+                    // shift[1] axis. See below for a graphical representation:
+                    // we are keeping the shifts indicated with `O` and dropping
+                    // the ones indicated with `X`
+                    //
+                    //  O O O │ O O O
+                    //  O O O │ O O O
+                    //  O O O │ O O O
+                    // ─X─X─X─┼─O─O─O─
+                    //  X X X │ X X X
+                    //  X X X │ X X X
+                    //  X X X │ X X X
+                    return;
+                }
+            }
+        }
+
+        auto vector = points[second] - points[first] + shift.cartesian(cell_matrix);
+        auto distance2 = vector.dot(vector);
+
+        if (distance2 < cutoff2) {
+            auto index = neighbors.length();
+            neighbors.set_pair(index, first, second);
+
+            if (options.return_shifts) {
+                neighbors.set_shift(index, shift);
+            }
+
+            if (options.return_distances) {
+                neighbors.set_distance(index, std::sqrt(distance2));
+            }
+
+            if (options.return_vectors) {
+                neighbors.set_vector(index, vector);
+            }
+
+            neighbors.increment_length();
+        }
+    });
+}
+
+/* ========================================================================== */
+
+/// Maximal number of cells, we need to use this to prevent having too many
+/// cells with a small bounding box and a large cutoff
+#define MAX_NUMBER_OF_CELLS 1e5
+
+
+/// Function to compute both quotient and remainder of the division of a by b.
+/// This function follows Python convention, making sure the remainder have the
+/// same sign as `b`.
+static std::tuple<int32_t, int32_t> divmod(int32_t a, size_t b) {
+    assert(b < (std::numeric_limits<int32_t>::max()));
+    auto b_32 = static_cast<int32_t>(b);
+    auto quotient = a / b_32;
+    auto remainder = a % b_32;
+    if (remainder < 0) {
+        remainder += b_32;
+        quotient -= 1;
+    }
+    return std::make_tuple(quotient, remainder);
+}
+
+/// Apply the `divmod` function to three components at the time
+static std::tuple<std::array<int32_t, 3>, std::array<int32_t, 3>>
+divmod(std::array<int32_t, 3> a, std::array<size_t, 3> b) {
+    auto [qx, rx] = divmod(a[0], b[0]);
+    auto [qy, ry] = divmod(a[1], b[1]);
+    auto [qz, rz] = divmod(a[2], b[2]);
+    return std::make_tuple(
+        std::array<int32_t, 3>{qx, qy, qz},
+        std::array<int32_t, 3>{rx, ry, rz}
+    );
+}
+
+CellList::CellList(BoundingBox box, double cutoff):
+    n_search_({0, 0, 0}),
+    cells_shape_({0, 0, 0}),
+    box_(box)
+{
+    auto distances_between_faces = box_.distances_between_faces();
+
+    auto n_cells = Vector{
+        std::clamp(std::trunc(distances_between_faces[0] / cutoff), 1.0, HUGE_VAL),
+        std::clamp(std::trunc(distances_between_faces[1] / cutoff), 1.0, HUGE_VAL),
+        std::clamp(std::trunc(distances_between_faces[2] / cutoff), 1.0, HUGE_VAL),
+    };
+
+    assert(std::isfinite(n_cells[0]) && std::isfinite(n_cells[1]) && std::isfinite(n_cells[2]));
+
+    // limit memory consumption by ensuring we have less than `MAX_N_CELLS`
+    // cells to look though
+    auto n_cells_total = n_cells[0] * n_cells[1] * n_cells[2];
+    if (n_cells_total > MAX_NUMBER_OF_CELLS) {
+        // set the total number of cells close to MAX_N_CELLS, while keeping
+        // roughly the ratio of cells in each direction
+        auto ratio_x_y = n_cells[0] / n_cells[1];
+        auto ratio_y_z = n_cells[1] / n_cells[2];
+
+        n_cells[2] = std::trunc(std::cbrt(MAX_NUMBER_OF_CELLS / (ratio_x_y * ratio_y_z * ratio_y_z)));
+        n_cells[1] = std::trunc(ratio_y_z * n_cells[2]);
+        n_cells[0] = std::trunc(ratio_x_y * n_cells[1]);
+    }
+
+    // number of cells to search in each direction to make sure all possible
+    // pairs below the cutoff are accounted for.
+    this->n_search_ = std::array<int32_t, 3>{
+        static_cast<int32_t>(std::ceil(cutoff * n_cells[0] / distances_between_faces[0])),
+        static_cast<int32_t>(std::ceil(cutoff * n_cells[1] / distances_between_faces[1])),
+        static_cast<int32_t>(std::ceil(cutoff * n_cells[2] / distances_between_faces[2])),
+    };
+
+    this->cells_shape_ = std::array<size_t, 3>{
+        static_cast<size_t>(n_cells[0]),
+        static_cast<size_t>(n_cells[1]),
+        static_cast<size_t>(n_cells[2]),
+    };
+
+    for (size_t spatial=0; spatial<3; spatial++) {
+        if (n_search_[spatial] < 1) {
+            n_search_[spatial] = 1;
+        }
+
+        // don't look for neighboring cells if we have only one cell and no
+        // periodic boundary condition
+        if (n_cells[spatial] == 1 && !box.periodic()) {
+            n_search_[spatial] = 0;
+        }
+    }
+
+    this->cells_.resize(cells_shape_[0] * cells_shape_[1] * cells_shape_[2]);
+}
+
+void CellList::add_point(size_t index, Vector position) {
+    auto fractional = box_.cartesian_to_fractional(position);
+
+    // find the cell in which this atom should go
+    auto cell_index = std::array<int32_t, 3>{
+        static_cast<int32_t>(std::floor(fractional[0] * static_cast<double>(cells_shape_[0]))),
+        static_cast<int32_t>(std::floor(fractional[1] * static_cast<double>(cells_shape_[1]))),
+        static_cast<int32_t>(std::floor(fractional[2] * static_cast<double>(cells_shape_[2]))),
+    };
+
+    // deal with pbc by wrapping the atom inside if it was outside of the
+    // cell
+    CellShift shift;
+    // auto (shift, cell_index) =
+    if (box_.periodic()) {
+        auto result = divmod(cell_index, cells_shape_);
+        shift = CellShift{std::get<0>(result)};
+        cell_index = std::get<1>(result);
+    } else {
+        shift = CellShift({0, 0, 0});
+        cell_index = std::array<int32_t, 3>{
+            std::clamp(cell_index[0], 0, static_cast<int32_t>(cells_shape_[0] - 1)),
+            std::clamp(cell_index[1], 0, static_cast<int32_t>(cells_shape_[1] - 1)),
+            std::clamp(cell_index[2], 0, static_cast<int32_t>(cells_shape_[2] - 1)),
+        };
+    }
+
+    this->get_cell(cell_index).emplace_back(Point{index, shift});
+}
+
+
+template <typename Function>
+void CellList::foreach_pair(Function callback) {
+    for (int32_t cell_i_x=0; cell_i_x<static_cast<int32_t>(cells_shape_[0]); cell_i_x++) {
+    for (int32_t cell_i_y=0; cell_i_y<static_cast<int32_t>(cells_shape_[1]); cell_i_y++) {
+    for (int32_t cell_i_z=0; cell_i_z<static_cast<int32_t>(cells_shape_[2]); cell_i_z++) {
+        const auto& current_cell = this->get_cell({cell_i_x, cell_i_y, cell_i_z});
+        // look through each neighboring cell
+        for (int32_t delta_x=-n_search_[0]; delta_x<=n_search_[0]; delta_x++) {
+        for (int32_t delta_y=-n_search_[1]; delta_y<=n_search_[1]; delta_y++) {
+        for (int32_t delta_z=-n_search_[2]; delta_z<=n_search_[2]; delta_z++) {
+            auto cell_i = std::array<int32_t, 3>{
+                cell_i_x + delta_x,
+                cell_i_y + delta_y,
+                cell_i_z + delta_z,
+            };
+
+            // shift vector from one cell to the other and index of
+            // the neighboring cell
+            auto [cell_shift, neighbor_cell_i] = divmod(cell_i, cells_shape_);
+
+            for (const auto& atom_i: current_cell) {
+                for (const auto& atom_j: this->get_cell(neighbor_cell_i)) {
+                    auto shift = CellShift{cell_shift} + atom_i.shift - atom_j.shift;
+                    auto shift_is_zero = shift[0] == 0 && shift[1] == 0 && shift[2] == 0;
+
+                    if (!box_.periodic() && !shift_is_zero) {
+                        // do not create pairs crossing the periodic
+                        // boundaries in a non-periodic box
+                        continue;
+                    }
+
+                    if (atom_i.index == atom_j.index && shift_is_zero) {
+                        // only create pairs with the same atom twice if the
+                        // pair spans more than one bounding box
+                        continue;
+                    }
+
+                    callback(atom_i.index, atom_j.index, shift);
+                }
+            } // loop over atoms in current neighbor cells
+        }}}
+    }}} // loop over neighboring cells
+}
+
+CellList::Cell& CellList::get_cell(std::array<int32_t, 3> index) {
+    size_t linear_index = (cells_shape_[0] * cells_shape_[1] * index[2])
+                        + (cells_shape_[0] * index[1])
+                        + index[0];
+    return cells_[linear_index];
+}
+
+/* ========================================================================== */
+
+
+void GrowableNeighborList::set_pair(size_t index, size_t first, size_t second) {
+    if (index >= this->capacity) {
+        this->grow();
+    }
+
+    this->neighbors.pairs[index][0] = first;
+    this->neighbors.pairs[index][1] = second;
+}
+
+void GrowableNeighborList::set_shift(size_t index, PLMD::metatensor::vesin::CellShift shift) {
+    if (index >= this->capacity) {
+        this->grow();
+    }
+
+    this->neighbors.shifts[index][0] = shift[0];
+    this->neighbors.shifts[index][1] = shift[1];
+    this->neighbors.shifts[index][2] = shift[2];
+}
+
+void GrowableNeighborList::set_distance(size_t index, double distance) {
+    if (index >= this->capacity) {
+        this->grow();
+    }
+
+    this->neighbors.distances[index] = distance;
+}
+
+void GrowableNeighborList::set_vector(size_t index, PLMD::metatensor::vesin::Vector vector) {
+    if (index >= this->capacity) {
+        this->grow();
+    }
+
+    this->neighbors.vectors[index][0] = vector[0];
+    this->neighbors.vectors[index][1] = vector[1];
+    this->neighbors.vectors[index][2] = vector[2];
+}
+
+template <typename scalar_t, size_t N>
+static scalar_t (*alloc(scalar_t (*ptr)[N], size_t size, size_t new_size))[N] {
+    auto* new_ptr = reinterpret_cast<scalar_t (*)[N]>(std::realloc(ptr, new_size * sizeof(scalar_t[N])));
+
+    if (new_ptr == nullptr) {
+        throw std::bad_alloc();
+    }
+
+    // initialize with a bit pattern that maps to NaN for double
+    std::memset(new_ptr + size, 0b11111111, (new_size - size) * sizeof(scalar_t[N]));
+
+    return new_ptr;
+}
+
+template <typename scalar_t>
+static scalar_t* alloc(scalar_t* ptr, size_t size, size_t new_size) {
+    auto* new_ptr = reinterpret_cast<scalar_t*>(std::realloc(ptr, new_size * sizeof(scalar_t)));
+
+    if (new_ptr == nullptr) {
+        throw std::bad_alloc();
+    }
+
+    // initialize with a bit pattern that maps to NaN for double
+    std::memset(new_ptr + size, 0b11111111, (new_size - size) * sizeof(scalar_t));
+
+    return new_ptr;
+}
+
+void GrowableNeighborList::grow() {
+    auto new_size = neighbors.length * 2;
+    if (new_size == 0) {
+        new_size = 1;
+    }
+
+    auto* new_pairs = alloc<size_t, 2>(neighbors.pairs, neighbors.length, new_size);
+
+    int32_t (*new_shifts)[3] = nullptr;
+    if (options.return_shifts) {
+        new_shifts = alloc<int32_t, 3>(neighbors.shifts, neighbors.length, new_size);
+    }
+
+    double *new_distances = nullptr;
+    if (options.return_distances) {
+        new_distances = alloc<double>(neighbors.distances, neighbors.length, new_size);
+    }
+
+    double (*new_vectors)[3] = nullptr;
+    if (options.return_vectors) {
+        new_vectors = alloc<double, 3>(neighbors.vectors, neighbors.length, new_size);
+    }
+
+    this->neighbors.pairs = new_pairs;
+    this->neighbors.shifts = new_shifts;
+    this->neighbors.distances = new_distances;
+    this->neighbors.vectors = new_vectors;
+
+    this->capacity = new_size;
+}
+
+void GrowableNeighborList::reset() {
+    // set all allocated data to zero
+    auto size = this->neighbors.length;
+    std::memset(this->neighbors.pairs, 0, size * sizeof(size_t[2]));
+
+    if (this->neighbors.shifts != nullptr) {
+        std::memset(this->neighbors.shifts, 0, size * sizeof(int32_t[3]));
+    }
+
+    if (this->neighbors.distances != nullptr) {
+        std::memset(this->neighbors.distances, 0, size * sizeof(double));
+    }
+
+    if (this->neighbors.vectors != nullptr) {
+        std::memset(this->neighbors.vectors, 0, size * sizeof(double[3]));
+    }
+
+    // reset length (but keep the capacity where it's at)
+    this->neighbors.length = 0;
+
+    // allocate/deallocate pointers as required
+    auto* shifts = this->neighbors.shifts;
+    if (this->options.return_shifts && shifts == nullptr) {
+        shifts = alloc<int32_t, 3>(shifts, 0, capacity);
+    } else if (!this->options.return_shifts && shifts != nullptr) {
+        std::free(shifts);
+        shifts = nullptr;
+    }
+
+    auto* distances = this->neighbors.distances;
+    if (this->options.return_distances && distances == nullptr) {
+        distances = alloc<double>(distances, 0, capacity);
+    } else if (!this->options.return_distances && distances != nullptr) {
+        std::free(distances);
+        distances = nullptr;
+    }
+
+    auto* vectors = this->neighbors.vectors;
+    if (this->options.return_vectors && vectors == nullptr) {
+        vectors = alloc<double, 3>(vectors, 0, capacity);
+    } else if (!this->options.return_vectors && vectors != nullptr) {
+        std::free(vectors);
+        vectors = nullptr;
+    }
+
+    this->neighbors.shifts = shifts;
+    this->neighbors.distances = distances;
+    this->neighbors.vectors = vectors;
+}
+
+
+void PLMD::metatensor::vesin::cpu::free_neighbors(VesinNeighborList& neighbors) {
+    assert(neighbors.device == VesinCPU);
+
+    std::free(neighbors.pairs);
+    std::free(neighbors.shifts);
+    std::free(neighbors.vectors);
+    std::free(neighbors.distances);
+}
+#include <cstring>
+#include <string>
+
+
+
+thread_local std::string LAST_ERROR;
+
+extern "C" int vesin_neighbors(
+    const double (*points)[3],
+    size_t n_points,
+    const double box[3][3],
+    bool periodic,
+    VesinDevice device,
+    VesinOptions options,
+    VesinNeighborList* neighbors,
+    const char** error_message
+) {
+    if (error_message == nullptr) {
+        return EXIT_FAILURE;
+    }
+
+    if (points == nullptr) {
+        *error_message = "`points` can not be a NULL pointer";
+        return EXIT_FAILURE;
+    }
+
+    if (box == nullptr) {
+        *error_message = "`cell` can not be a NULL pointer";
+        return EXIT_FAILURE;
+    }
+
+    if (neighbors == nullptr) {
+        *error_message = "`neighbors` can not be a NULL pointer";
+        return EXIT_FAILURE;
+    }
+
+    if (neighbors->device != VesinUnknownDevice && neighbors->device != device) {
+        *error_message = "`neighbors` device and data `device` do not match, free the neighbors first";
+        return EXIT_FAILURE;
+    }
+
+    if (device == VesinUnknownDevice) {
+        *error_message = "got an unknown device to use when running simulation";
+        return EXIT_FAILURE;
+    }
+
+    if (neighbors->device == VesinUnknownDevice) {
+        // initialize the device
+        neighbors->device = device;
+    } else if (neighbors->device != device) {
+        *error_message = "`neighbors.device` and `device` do not match, free the neighbors first";
+        return EXIT_FAILURE;
+    }
+
+    try {
+        if (device == VesinCPU) {
+            auto matrix = PLMD::metatensor::vesin::Matrix{{{
+                {{box[0][0], box[0][1], box[0][2]}},
+                {{box[1][0], box[1][1], box[1][2]}},
+                {{box[2][0], box[2][1], box[2][2]}},
+            }}};
+
+            PLMD::metatensor::vesin::cpu::neighbors(
+                reinterpret_cast<const PLMD::metatensor::vesin::Vector*>(points),
+                n_points,
+                PLMD::metatensor::vesin::BoundingBox(matrix, periodic),
+                options,
+                *neighbors
+            );
+        } else {
+            throw std::runtime_error("unknown device " + std::to_string(device));
+        }
+    } catch (const std::bad_alloc&) {
+        LAST_ERROR = "failed to allocate memory";
+        *error_message = LAST_ERROR.c_str();
+        return EXIT_FAILURE;
+    } catch (const std::exception& e) {
+        LAST_ERROR = e.what();
+        *error_message = LAST_ERROR.c_str();
+        return EXIT_FAILURE;
+    } catch (...) {
+        *error_message = "fatal error: unknown type thrown as exception";
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+
+extern "C" void vesin_free(VesinNeighborList* neighbors) {
+    if (neighbors == nullptr) {
+        return;
+    }
+
+    if (neighbors->device == VesinUnknownDevice) {
+        // nothing to do
+    } else if (neighbors->device == VesinCPU) {
+        PLMD::metatensor::vesin::cpu::free_neighbors(*neighbors);
+    }
+
+    std::memset(neighbors, 0, sizeof(VesinNeighborList));
+}
diff --git a/src/metatensor/vesin.h b/src/metatensor/vesin.h
new file mode 100644
index 0000000000..e02231d4ce
--- /dev/null
+++ b/src/metatensor/vesin.h
@@ -0,0 +1,186 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Copyright (c) 2024 The METATENSOR code team
+(see the PEOPLE-METATENSOR file at the root of this folder for a list of names)
+
+See https://docs.metatensor.org/latest/ for more information about the
+metatensor package that this module allows you to call from PLUMED.
+
+This file is part of METATENSOR-PLUMED module.
+
+The METATENSOR-PLUMED module is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The METATENSOR-PLUMED module is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the METATENSOR-PLUMED module. If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+#ifndef __PLUMED_metatensor_vesin_h
+#define __PLUMED_metatensor_vesin_h
+/*INDENT-OFF*/
+
+
+#include <cstddef>
+#include <cstdint>
+
+#if defined(VESIN_SHARED)
+    #if defined(VESIN_EXPORTS)
+        #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)
+            #define VESIN_API __attribute__((visibility("default")))
+        #elif defined(_MSC_VER)
+            #define VESIN_API __declspec(dllexport)
+        #else
+            #define VESIN_API
+        #endif
+    #else
+        #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)
+            #define VESIN_API __attribute__((visibility("default")))
+        #elif defined(_MSC_VER)
+            #define VESIN_API __declspec(dllimport)
+        #else
+            #define VESIN_API
+        #endif
+    #endif
+#else
+    #define VESIN_API
+#endif
+
+#ifdef __cplusplus
+namespace PLMD {
+namespace metatensor {
+namespace vesin {
+extern "C" {
+#endif
+
+/// Options for a neighbor list calculation
+struct VesinOptions {
+    /// Spherical cutoff, only pairs below this cutoff will be included
+    double cutoff;
+    /// Should the returned neighbor list be a full list (include both `i -> j`
+    /// and `j -> i` pairs) or a half list (include only `i -> j`)?
+    bool full;
+    // TODO: sort option?
+
+    /// Should the returned `VesinNeighborList` contain `shifts`?
+    bool return_shifts;
+    /// Should the returned `VesinNeighborList` contain `distances`?
+    bool return_distances;
+    /// Should the returned `VesinNeighborList` contain `vector`?
+    bool return_vectors;
+};
+
+/// Device on which the data can be
+enum VesinDevice {
+    /// Unknown device, used for default initialization and to indicate no
+    /// allocated data.
+    VesinUnknownDevice = 0,
+    /// CPU device
+    VesinCPU = 1,
+};
+
+
+/// The actual neighbor list
+///
+/// This is organized as a list of pairs, where each pair can contain the
+/// following data:
+///
+/// - indices of the points in the pair;
+/// - distance between points in the pair, accounting for periodic boundary
+///   conditions;
+/// - vector between points in the pair, accounting for periodic boundary
+///   conditions;
+/// - periodic shift that created the pair. This is only relevant when using
+///   periodic boundary conditions, and contains the number of bounding box we
+///   need to cross to create the pair. If the positions of the points are `r_i`
+///   and `r_j`, the bounding box is described by a matrix of three vectors `H`,
+///   and the periodic shift is `S`, the distance vector for a given pair will
+///   be given by `r_ij = r_j - r_i + S @ H`.
+///
+/// Under periodic boundary conditions, two atoms can be part of multiple pairs,
+/// each pair having a different periodic shift.
+struct VESIN_API VesinNeighborList {
+#ifdef __cplusplus
+    VesinNeighborList():
+        length(0),
+        device(VesinUnknownDevice),
+        pairs(nullptr),
+        shifts(nullptr),
+        distances(nullptr),
+        vectors(nullptr)
+    {}
+#endif
+
+    /// Number of pairs in this neighbor list
+    size_t length;
+    /// Device used for the data allocations
+    VesinDevice device;
+    /// Array of pairs (storing the indices of the first and second point in the
+    /// pair), containing `length` elements.
+    size_t (*pairs)[2];
+    /// Array of box shifts, one for each `pair`. This is only set if
+    /// `options.return_pairs` was `true` during the calculation.
+    int32_t (*shifts)[3];
+    /// Array of pair distance (i.e. distance between the two points), one for
+    /// each pair. This is only set if `options.return_distances` was `true`
+    /// during the calculation.
+    double *distances;
+    /// Array of pair vector (i.e. vector between the two points), one for
+    /// each pair. This is only set if `options.return_vector` was `true`
+    /// during the calculation.
+    double (*vectors)[3];
+
+    // TODO: custom memory allocators?
+};
+
+/// Free all allocated memory inside a `VesinNeighborList`, according the it's
+/// `device`.
+void VESIN_API vesin_free(struct VesinNeighborList* neighbors);
+
+/// Compute a neighbor list.
+///
+/// The data is returned in a `VesinNeighborList`. For an initial call, the
+/// `VesinNeighborList` should be zero-initialized (or default-initalized in
+/// C++). The `VesinNeighborList` can be re-used across calls to this functions
+/// to re-use memory allocations, and once it is no longer needed, users should
+/// call `vesin_free` to release the corresponding memory.
+///
+/// @param points positions of all points in the system;
+/// @param n_points number of elements in the `points` array
+/// @param box bounding box for the system. If the system is non-periodic,
+///     this is ignored. This should contain the three vectors of the bounding
+///     box, one vector per row of the matrix.
+/// @param periodic is the system using periodic boundary conditions?
+/// @param device device where the `points` and `box` data is allocated.
+/// @param options options for the calculation
+/// @param neighbors non-NULL pointer to `VesinNeighborList` that will be used
+///     to store the computed list of neighbors.
+/// @param error_message Pointer to a `char*` that wil be set to the error
+///     message if this function fails. This does not need to be freed when no
+///     longer needed.
+int VESIN_API vesin_neighbors(
+    const double (*points)[3],
+    size_t n_points,
+    const double box[3][3],
+    bool periodic,
+    VesinDevice device,
+    struct VesinOptions options,
+    struct VesinNeighborList* neighbors,
+    const char** error_message
+);
+
+
+#ifdef __cplusplus
+
+} // extern "C"
+} // namespace vesin
+} // namespace metatensor
+} // namespace PLMD
+
+#endif
+
+#endif
diff --git a/user-doc/METATENSORMOD.md b/user-doc/METATENSORMOD.md
new file mode 100644
index 0000000000..08c4944dec
--- /dev/null
+++ b/user-doc/METATENSORMOD.md
@@ -0,0 +1,161 @@
+\page METATENSORMOD Metatensor
+
+<!--
+description: Using arbitrary machine learning models as collective variables
+authors: Guillaume Fraux
+reference:
+-->
+
+# Overview
+
+This module implements the interface between PLUMED and [metatensor], allowing
+to use arbitrary machine learning models as collective variables. These machine
+learning models are defined using custom Python code — following the [metatensor
+atomistic models][mts_models] interface — and then exported to [TorchScript].
+The exported model is then loaded inside PLUMED and executed during the
+simulation.
+
+
+# Installation
+
+This module requires two main dependencies: the C++ torch library (i.e.
+`libtorch`); and the C++ metatensor_torch library. There are multiple ways of
+installing both libraries, which are discussed below.
+
+## Installing the libraries through Python's package manager (`pip`)
+
+The easiest way to get all dependencies on your system is to download the
+pre-built Python wheels with `pip`. This is the same set of wheels you will need
+to define custom models.
+
+```bash
+pip install "metatensor-torch ==0.5.1"  # change this version to get newer releases
+
+# optional: get the other metatensor tools to define models (these are only usable from Python).
+pip install metatensor-operations metatensor-learn
+
+# export the location to all the libraries:
+TORCH_CMAKE_PREFIX=$(python -c "import torch; print(torch.utils.cmake_prefix_path)")
+TORCH_PREFIX=$(cd "$TORCH_CMAKE_PREFIX/../.." && pwd)
+
+METATENSOR_CMAKE_PREFIX=$(python -c "import metatensor; print(metatensor.utils.cmake_prefix_path)")
+METATENSOR_PREFIX=$(cd "$METATENSOR_CMAKE_PREFIX/../.." && pwd)
+
+METATENSOR_TORCH_CMAKE_PREFIX=$(python -c "import metatensor.torch; print(metatensor.torch.utils.cmake_prefix_path)")
+METATENSOR_TORCH_PREFIX=$(cd "$METATENSOR_TORCH_CMAKE_PREFIX/../.." && pwd)
+
+# The torch library installed by pip uses a pre-cxx11 ABI
+TORCH_CPPFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
+```
+
+That's it, you can now jump to [the last part](#building-plumed-with-metatensor)
+of the installation instructions.
+
+## Using pre-built libraries
+
+If you only want to use existing models, you can download pre-built versions of
+the libraries and build PLUMED against these. First, you'll need to download
+libtorch (see also \ref installation-libtorch for other instructions on
+installing a pre-built libtorch):
+
+```bash
+# Download torch 2.2.2 for x86_64 (Intel) Linux.
+#
+# Variations of this link for other operating systems (macOS, Windows), CPU
+# architecture (Apple Silicon, arm64), CUDA versions, and newer versions of
+# libtorch can be found at https://pytorch.org/get-started/locally/
+
+wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.2.2%2Bcpu.zip
+unzip libtorch-cxx11-abi-shared-with-deps-2.2.2+cpu.zip
+
+# alternatively if you have a CUDA-enabled GPU, you can use the corresponding
+# pre-built library (here for CUDA 12.1):
+wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.2%2Bcu121.zip
+unzip libtorch-cxx11-abi-shared-with-deps-2.2.2+cu121.zip
+
+# Make the location of libtorch visible
+TORCH_PREFIX=$(pwd)/libtorch
+
+# if you are using a library with pre-cxx11 ABI, you need an extra flag:
+TORCH_CPPFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0"
+```
+
+Once you acquire libtorch, it is time to build metatensor and metatensor_torch
+from sources. There is currently no standalone pre-built library for these
+(although you can use the pre-built version that comes with `pip`). For this,
+you'll need a rust compiler on your system, which you can get with
+[rustup](https://rustup.rs/) or any other method at your convenience.
+
+```bash
+# patch a bug from torch's MKL detection in CMake
+cd <PLUMED/DIR>
+./src/metatensor/patch-torch.sh "$TORCH_PREFIX"
+
+cd <SOME/PLACE/WHERE/TO/PUT/METATENSOR/SOURCES>
+
+# define a location where metatensor should be installed
+METATENSOR_PREFIX=<...>
+
+METATENSOR_TORCH_PREFIX="$METATENSOR_PREFIX"
+
+git clone https://github.com/lab-cosmo/metatensor
+# or a more recent release of metatensor-torch
+git checkout metatensor-torch-v0.5.0
+cd metatensor
+
+mkdir build && cd build
+cmake -DBUILD_SHARED_LIBS=ON \
+      -DCMAKE_INSTALL_PREFIX="$METATENSOR_PREFIX" \
+      -DCMAKE_PREFIX_PATH="$TORCH_PREFIX" \
+      -DBUILD_METATENSOR_TORCH=ON \
+      -DMETATENSOR_INSTALL_BOTH_STATIC_SHARED=OFF \
+      ..
+
+cmake --build . --target install --parallel
+```
+
+## Building plumed with metatensor
+
+Once you installed all dependencies with one of the methods above, you can now
+configure PLUMED:
+
+```bash
+# set include search path for the compilers
+TORCH_INCLUDES="-I$TORCH_PREFIX/include -I$TORCH_PREFIX/include/torch/csrc/api/include"
+CPPFLAGS="$TORCH_INCLUDES $TORCH_CPPFLAGS -I$METATENSOR_PREFIX/include -I$METATENSOR_TORCH_PREFIX/include $CPPFLAGS"
+
+# set library search path for the linker
+LDFLAGS="-L$TORCH_PREFIX/lib -L$METATENSOR_PREFIX/lib -L$METATENSOR_TORCH_PREFIX/lib $LDFLAGS"
+
+# set the rpath to make sure plumed executable will be able to find the right libraries
+LDFLAGS="$LDFLAGS -Wl,-rpath,$TORCH_PREFIX/lib"
+LDFLAGS="$LDFLAGS -Wl,-rpath,$METATENSOR_PREFIX/lib -Wl,-rpath,$METATENSOR_TORCH_PREFIX/lib"
+
+# If you are running on Linux, force the use of rpath instead of runpath
+# (we rely on the rpath to find dependencies of dependencies)
+LDFLAGS="$LDFLAGS -Wl,--disable-new-dtags"
+
+# configure PLUMED
+./configure --enable-libtorch --enable-metatensor --enable-modules=+metatensor \
+    LDFLAGS="$LDFLAGS" CPPFLAGS="$CPPFLAGS"
+```
+
+Pay close attention to the output, it should contain **both** a line about
+`checking libtorch` and a line about `checking metatensor`, both ending with
+`...yes`. If this is not the case, you'll get a warning about `cannot enable
+__PLUMED_HAS_LIBTORCH` or `cannot enable __PLUMED_HAS_METATENSOR`. If you get
+any of these warnings, you should check `config.log` to know more about what's
+going on and why these libraries can't be found.
+
+# Module Contents
+
+This module defines the following actions:
+
+@METATENSORMOD_COLVAR@
+
+
+
+
+[TorchScript]: https://pytorch.org/docs/stable/jit.html
+[metatensor]: https://docs.metatensor.org/latest/index.html
+[mts_models]: https://docs.metatensor.org/latest/atomistic/index.html