diff --git a/README.md b/README.md
index f564c867..f0ff4bc9 100644
--- a/README.md
+++ b/README.md
@@ -147,7 +147,8 @@ await pde(..., platform="nvidia-acc")
 ```
 
 The Devito library uses OpenACC to generate GPU code. The recommended way to access the necessary 
-compilers is to install the [NVIDIA HPC SDK](https://developer.nvidia.com/nvidia-hpc-sdk-downloads).
+compilers is to install the [NVIDIA HPC SDK](https://developer.nvidia.com/nvidia-hpc-sdk-downloads) **before** creating
+the Stride environment.
 
 ```sh
 wget https://developer.download.nvidia.com/hpc-sdk/22.11/nvhpc_2022_2211_Linux_x86_64_cuda_multi.tar.gz
@@ -158,13 +159,18 @@ sudo ./install
 
 During the installation, select the ``single system install`` option.
 
-Once the installation is done, you can add the following lines to your ``~/.bashrc``:
+Once the installation is done, add the following lines to your ``~/.bashrc``:
 
 ```sh
-export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.11/compilers/bin/:$PATH
-export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.11/compilers/lib/:$LD_LIBRARY_PATH
-export PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.11/comm_libs/mpi/bin/:$PATH
-export LD_LIBRARY_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/22.11/comm_libs/mpi/lib/:$LD_LIBRARY_PATH
+export HPCSDK_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/22.11
+export CUDA_HOME=$HPCSDK_HOME/cuda
+export CUDA_ROOT=$HPCSDK_HOME/cuda/bin
+export PATH=$HPCSDK_HOME/compilers/bin/:$PATH
+export LD_LIBRARY_PATH=$HPCSDK_HOME/compilers/lib/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$HPCSDK_HOME/cuda/lib/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$HPCSDK_HOME/cuda/lib64/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$HPCSDK_HOME/math_libs/lib64/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$HPCSDK_CUPTI/lib64/:$LD_LIBRARY_PATH
 ```
 
 ## Citing Stride
diff --git a/stride/optimisation/loss/functional.py b/stride/optimisation/loss/functional.py
index ee8c43e0..b7723a5b 100644
--- a/stride/optimisation/loss/functional.py
+++ b/stride/optimisation/loss/functional.py
@@ -26,8 +26,8 @@ def __init__(self, fun_value, shot_id, residuals=None, **kwargs):
 
         self.shot_id = shot_id
         self.fun_value = fun_value
-        # TODO Residuals use space that we don't always need
-        # self.residuals = residuals
+        if kwargs.pop('keep_residual', False):
+            self.residuals = residuals
 
     def __repr__(self):
         return 'loss %e for shot %d' % (self.fun_value, self.shot_id)
diff --git a/stride/optimisation/loss/l2_distance.py b/stride/optimisation/loss/l2_distance.py
index 19043b8a..01ab0988 100644
--- a/stride/optimisation/loss/l2_distance.py
+++ b/stride/optimisation/loss/l2_distance.py
@@ -34,7 +34,7 @@ async def forward(self, modelled, observed, **kwargs):
         self.residual = residual
 
         fun_data = 0.5 * np.sum(residual.data ** 2)
-        fun = FunctionalValue(fun_data, shot_id, residual)
+        fun = FunctionalValue(fun_data, shot_id, residual, **kwargs)
 
         return fun