From faa67170f626fa76c235fa4475c6dc7f9533866f Mon Sep 17 00:00:00 2001
From: Johannes Mueller <johannes.mueller4@de.bosch.com>
Date: Tue, 9 Jan 2024 12:04:41 +0100
Subject: [PATCH] Create docs-gh-pages.yml (#35)

Signed-off-by: Johannes Mueller <johannes.mueller4@de.bosch.com>
---
 .github/workflows/docs-gh-pages.yml           | 53 +++++++++++++
 .../{python-package.yml => pytest.yml}        |  4 +-
 src/torchphysics/models/activation_fn.py      | 16 ++--
 .../models/deeponet/branchnets.py             | 74 ++++++++++---------
 src/torchphysics/models/deeponet/deeponet.py  | 38 +++++-----
 src/torchphysics/models/deeponet/layers.py    |  4 +-
 src/torchphysics/models/deeponet/trunknets.py | 36 +++++----
 src/torchphysics/models/deepritz.py           |  8 +-
 src/torchphysics/models/fcn.py                | 30 ++++----
 src/torchphysics/models/model.py              | 16 ++--
 src/torchphysics/models/qres.py               | 30 ++++----
 .../problem/conditions/condition.py           | 16 ++--
 .../problem/conditions/deeponet_condition.py  | 28 +++----
 .../problem/samplers/random_samplers.py       | 30 ++++----
 src/torchphysics/solver.py                    | 10 ++-
 .../utils/differentialoperators.py            | 26 +++----
 16 files changed, 241 insertions(+), 178 deletions(-)
 create mode 100644 .github/workflows/docs-gh-pages.yml
 rename .github/workflows/{python-package.yml => pytest.yml} (97%)

diff --git a/.github/workflows/docs-gh-pages.yml b/.github/workflows/docs-gh-pages.yml
new file mode 100644
index 00000000..702e53b4
--- /dev/null
+++ b/.github/workflows/docs-gh-pages.yml
@@ -0,0 +1,53 @@
+name: documentation
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+
+jobs:
+  build-docs:
+    runs-on: [ubuntu-latest]
+    container: python:3.10-buster
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install dependencies
+        run: |
+          apt-get update
+          apt-get -y install xvfb pandoc
+          python -m pip install --upgrade pip setuptools setuptools_scm wheel
+          pip install -e .[docs]
+          pip install -r docs/requirements.txt
+      - name: Build documentation
+        run: |
+          sphinx-build -b html docs/ _site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy-docs:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    if: github.ref_name == 'main'
+    needs: build-docs
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/python-package.yml b/.github/workflows/pytest.yml
similarity index 97%
rename from .github/workflows/python-package.yml
rename to .github/workflows/pytest.yml
index ff00391a..b2abebb3 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/pytest.yml
@@ -1,11 +1,11 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: Python package
+name: Unit tests
 
 on:
   push:
-    branches: '**'
+    branches: [ main ]
   pull_request:
     branches: [ main ]
 
diff --git a/src/torchphysics/models/activation_fn.py b/src/torchphysics/models/activation_fn.py
index af08f7ef..a266dd96 100644
--- a/src/torchphysics/models/activation_fn.py
+++ b/src/torchphysics/models/activation_fn.py
@@ -3,9 +3,9 @@
 
 
 class AdaptiveActivationFunction(nn.Module):
-    """Implementation of the adaptive activation functions used in [1].
-    Will create activations of the form: activation_fn(scaling*a * x), 
-    where activation_fn is an arbitrary function, a is the additional 
+    """Implementation of the adaptive activation functions used in [#]_.
+    Will create activations of the form: activation_fn(scaling*a * x),
+    where activation_fn is an arbitrary function, a is the additional
     hyperparameter and scaling is an additional scaling factor.
 
     Parameters
@@ -21,8 +21,8 @@ class AdaptiveActivationFunction(nn.Module):
 
     Notes
     -----
-    ..  [1] Ameya D. Jagtap, Kenji Kawaguchi and George Em Karniadakis, 
-        "Adaptive activation functions accelerate convergence in deep and 
+    ..  [#] Ameya D. Jagtap, Kenji Kawaguchi and George Em Karniadakis,
+        "Adaptive activation functions accelerate convergence in deep and
         physics-informed neural networks", 2020
     """
     def __init__(self, activation_fn, inital_a=1.0, scaling=1.0):
@@ -55,14 +55,14 @@ def backward(ctx, grad_output):
 
 
 class ReLUn(nn.Module):
-    """Implementation of a smoother version of ReLU, in the 
+    """Implementation of a smoother version of ReLU, in the
     form of relu(x)**n.
 
     Parameters
     ----------
     n : float
         The power to which the inputs should be rasied before appplying the
-        rectified linear unit function. 
+        rectified linear unit function.
     """
     def __init__(self, n):
         super().__init__()
@@ -80,4 +80,4 @@ def __init__(self):
         super().__init__()
 
     def forward(self, input):
-        return torch.sin(input)
\ No newline at end of file
+        return torch.sin(input)
diff --git a/src/torchphysics/models/deeponet/branchnets.py b/src/torchphysics/models/deeponet/branchnets.py
index 05d34d0a..2f7b741c 100644
--- a/src/torchphysics/models/deeponet/branchnets.py
+++ b/src/torchphysics/models/deeponet/branchnets.py
@@ -11,15 +11,17 @@
 
 class BranchNet(Model):
     """A neural network that can be used inside a DeepONet-model.
+
     Parameters
     ----------
     function_space : Space
         The space of functions that can be put in this network.
     discretization_sampler : torchphysics.sampler
-        A sampler that will create the points at which the input functions should 
+        A sampler that will create the points at which the input functions should
         evaluated, to create a discrete input for the network.
         The number of input neurons will be equal to the number of sampled points.
         Therefore, the sampler should always return the same number of points!
+
     """
     def __init__(self, function_space, discretization_sampler):
         super().__init__(function_space, output_space=None)
@@ -29,39 +31,39 @@ def __init__(self, function_space, discretization_sampler):
         self.current_out = torch.empty(0)
 
     def finalize(self, output_space, output_neurons):
-        """Method to set the output space and output neurons of the network. 
+        """Method to set the output space and output neurons of the network.
         Will be called once the BranchNet is connected to the TrunkNet, so
         that both will have a fitting output shape.
 
         output_space : Space
             The space in which the final output of the DeepONet will belong to.
         output_neurons : int
-            The number of output neurons. Will be multiplied my the dimension of the 
-            output space, so each dimension will have the same number of 
+            The number of output neurons. Will be multiplied my the dimension of the
+            output space, so each dimension will have the same number of
             intermediate neurons.
         """
         self.output_neurons = output_neurons
         self.output_space = output_space
 
     def _reshape_multidimensional_output(self, output):
-        return output.reshape(-1, self.output_space.dim, 
+        return output.reshape(-1, self.output_space.dim,
                               int(self.output_neurons/self.output_space.dim))
-        
+
     @abc.abstractmethod
     def forward(self, discrete_function_batch, device='cpu'):
         """Evaluated the network at a given function batch. Should not be called
         directly, rather use the method ``.fix_input``.
-        
+
         Parameters
         ----------
         discrete_function_batch : tp.space.Points
             The points object of discrete function values to evaluate the model.
         device : str, optional
             The device where the data lays. Default is 'cpu'.
-        
+
         Notes
         -----
-        Will, in general, not return anything. The output of the network will be saved 
+        Will, in general, not return anything. The output of the network will be saved
         internally to be used multiple times.
         """
         raise NotImplementedError
@@ -75,20 +77,20 @@ def _discretize_function_set(self, function_set, device='cpu'):
         return fn_out
 
     def fix_input(self, function, device='cpu'):
-        """Fixes the branch net for a given function. The branch net will 
-        be evaluated for the given function and the output saved in ``current_out``. 
-        
+        """Fixes the branch net for a given function. The branch net will
+        be evaluated for the given function and the output saved in ``current_out``.
+
         Parameters
         ----------
-        function : callable, torchphysics.domains.FunctionSet, torch.Tensor, 
+        function : callable, torchphysics.domains.FunctionSet, torch.Tensor,
                     torchphysics.spaces.Points
             The function(s) for which the network should be evaluaded.
         device : str, optional
             The device where the data lays. Default is 'cpu'.
-        
+
         Notes
         -----
-        To overwrite the data ``current_out`` (the fixed function) just call 
+        To overwrite the data ``current_out`` (the fixed function) just call
         ``.fix_input`` again with a new function.
         """
         if isinstance(function, FunctionSet):
@@ -120,13 +122,13 @@ def fix_input(self, function, device='cpu'):
 
 class FCBranchNet(BranchNet):
     """A neural network that can be used inside a DeepONet-model.
-    
+
     Parameters
     ----------
     function_space : Space
         The space of functions that can be put in this network.
     discretization_sampler : torchphysics.sampler
-        A sampler that will create the points at which the input functions should 
+        A sampler that will create the points at which the input functions should
         evaluated, to create a discrete input for the network.
         The number of input neurons will be equal to the number of sampled points.
         Therefore, the sampler should always return the same number of points!
@@ -136,13 +138,13 @@ class FCBranchNet(BranchNet):
         of hidden layers, while the i-th entry will determine the number
         of neurons of each layer.
     activations : torch.nn or list, optional
-        The activation functions of this network. 
+        The activation functions of this network.
         Deafult is nn.Tanh().
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
-        Default is 5/3. 
+        Default is 5/3.
     """
-    def __init__(self, function_space, discretization_sampler, hidden=(20,20,20), 
+    def __init__(self, function_space, discretization_sampler, hidden=(20,20,20),
                  activations=nn.Tanh(), xavier_gains=5/3):
         super().__init__(function_space, discretization_sampler)
         self.hidden = hidden
@@ -151,8 +153,8 @@ def __init__(self, function_space, discretization_sampler, hidden=(20,20,20),
 
     def finalize(self, output_space, output_neurons):
         super().finalize(output_space, output_neurons)
-        layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_dim, 
-                        output_dim=self.output_neurons, activations=self.activations, 
+        layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_dim,
+                        output_dim=self.output_neurons, activations=self.activations,
                         xavier_gains=self.xavier_gains)
 
         self.sequential = nn.Sequential(*layers)
@@ -160,7 +162,7 @@ def finalize(self, output_space, output_neurons):
     def forward(self, discrete_function_batch):
         discrete_function_batch = discrete_function_batch.as_tensor.reshape(-1, self.input_dim)
         self.current_out = self._reshape_multidimensional_output(self.sequential(discrete_function_batch))
-       
+
 
 class ConvBranchNet1D(BranchNet):
     """A branch network that first applies a convolution to the input functions
@@ -171,20 +173,20 @@ class ConvBranchNet1D(BranchNet):
     function_space : Space
         The space of functions that can be put in this network.
     discretization_sampler : torchphysics.sampler
-        A sampler that will create the points at which the input functions should 
+        A sampler that will create the points at which the input functions should
         evaluated, to create a discrete input for the network.
         The number of input neurons will be equal to the number of sampled points.
         Therefore, the sampler should always return the same number of points!
     convolutional_network : torch.nn.module
-        The user defined convolutional network, that should be applied to the 
-        branch input. Inside this network, the input can be transformed arbitrary, 
-        e.g. you can also apply pooling or other layers. 
+        The user defined convolutional network, that should be applied to the
+        branch input. Inside this network, the input can be transformed arbitrary,
+        e.g. you can also apply pooling or other layers.
         We only expect that the network gets the input in the shape:
 
-        [batch_dim, function_space.output_space.dim (channels_in), 
+        [batch_dim, function_space.output_space.dim (channels_in),
          len(discretization_sampler)]
 
-        You have to make sure, that the number of output dimension is 
+        You have to make sure, that the number of output dimension is
         compatible with the following linear layers.
     hidden : list or tuple
         The number and size of the hidden layers of the neural network.
@@ -192,11 +194,11 @@ class ConvBranchNet1D(BranchNet):
         of hidden layers, while the i-th entry will determine the number
         of neurons of each layer.
     activations : torch.nn or list, optional
-        The activation functions of this network. 
+        The activation functions of this network.
         Deafult is nn.Tanh().
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
-        Default is 5/3. 
+        Default is 5/3.
     """
     def __init__(self, function_space, discretization_sampler, convolutional_network,
                  hidden=(20,20,20), activations=nn.Tanh(), xavier_gains=5/3):
@@ -208,8 +210,8 @@ def __init__(self, function_space, discretization_sampler, convolutional_network
 
     def finalize(self, output_space, output_neurons):
         super().finalize(output_space, output_neurons)
-        layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_dim, 
-                        output_dim=self.output_neurons, activations=self.activations, 
+        layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_dim,
+                        output_dim=self.output_neurons, activations=self.activations,
                         xavier_gains=self.xavier_gains)
 
         self.sequential = nn.Sequential(*layers)
@@ -224,14 +226,14 @@ def _discretize_fn(self, function, device):
         return discrete_fn.unsqueeze(-1)
 
     def forward(self, discrete_function_batch):
-        # for convolution we have to change the dimension order of 
+        # for convolution we have to change the dimension order of
         # the input.
         # Pytorch conv1D needs: (batch, channels_in, length)
         # Generally we have : (batch, length, channels_in), where channels_in
-        # corresponds to the output dimension of our functions and length to the 
+        # corresponds to the output dimension of our functions and length to the
         # number of discretization points. -> switch dim. 1 and 2
         discrete_function_batch = discrete_function_batch.as_tensor
         x = self.conv_net(discrete_function_batch.permute(0, 2, 1))
         # for the linear layer transform again and remove the last dimension:
         out = self.sequential(x.permute(0, 2, 1).reshape(-1, self.input_dim))
-        self.current_out = self._reshape_multidimensional_output(out)
\ No newline at end of file
+        self.current_out = self._reshape_multidimensional_output(out)
diff --git a/src/torchphysics/models/deeponet/deeponet.py b/src/torchphysics/models/deeponet/deeponet.py
index e10f37dc..3d7c09b1 100644
--- a/src/torchphysics/models/deeponet/deeponet.py
+++ b/src/torchphysics/models/deeponet/deeponet.py
@@ -7,39 +7,39 @@
 
 
 class DeepONet(Model):
-    """Implementation of the architecture used in the DeepONet paper [1].
+    """Implementation of the architecture used in the DeepONet paper [#]_.
     Consists of two single neural networks. One for the inputs of the function
     space (branch net) and one for the inputs of the variables (trunk net).
 
     Parameters
     ----------
     trunk_net : torchphysics.models.TrunkNet
-        The neural network that will get the space/time/... variables as an 
-        input. 
+        The neural network that will get the space/time/... variables as an
+        input.
     branch_net : torchphysics.models.BranchNet
-        The neural network that will get the function variables as an 
-        input. 
+        The neural network that will get the function variables as an
+        input.
     output_space : Space
         The space in which the final output of the DeepONet will belong to.
     output_neurons : int
-        The number of output neurons, that will be the output of the 
+        The number of output neurons, that will be the output of the
         TrunkNet and BranchNet. The corresponding outputs of both networks
-        are then connected with the inner product. 
-        For higher dimensional outputs, will be multiplied my the dimension of 
-        the output space, so each dimension will have the same number of 
+        are then connected with the inner product.
+        For higher dimensional outputs, will be multiplied my the dimension of
+        the output space, so each dimension will have the same number of
         intermediate neurons.
 
     Notes
     -----
     The number of output neurons in the branch and trunk net have to be the same!
 
-    ..  [1] Lu Lu and Pengzhan Jin and Guofei Pang and Zhongqiang Zhang
-        and George Em Karniadakis, "Learning nonlinear operators via DeepONet 
+    ..  [#] Lu Lu and Pengzhan Jin and Guofei Pang and Zhongqiang Zhang
+        and George Em Karniadakis, "Learning nonlinear operators via DeepONet
         based on the universal approximation theorem of operators", 2021
     """
     def __init__(self, trunk_net, branch_net, output_space, output_neurons):
         self._check_trunk_and_branch_correct(trunk_net, branch_net)
-        super().__init__(input_space=trunk_net.input_space, 
+        super().__init__(input_space=trunk_net.input_space,
                          output_space=output_space)
         self.trunk = trunk_net
         self.branch = branch_net
@@ -70,19 +70,19 @@ def forward(self, trunk_inputs, branch_inputs=None, device='cpu'):
         trunk_inputs : torchphysics.spaces.Points
             The inputs for the trunk net.
         branch_inputs : callable, torchphysics.domains.FunctionSet, optional
-            The function(s) for which the branch should be evaluaded. If no 
+            The function(s) for which the branch should be evaluaded. If no
             input is given, the branch net has to be fixed before hand!
         device : str, optional
             The device where the data lays. Default is 'cpu'.
-            
+
         Returns
         -------
         torchphysics.spaces.Points
             A point object containing the output.
-        
+
         """
         if not branch_inputs is None:
-            self.fix_branch_input(branch_inputs, device=device) 
+            self.fix_branch_input(branch_inputs, device=device)
         trunk_out = self.trunk(trunk_inputs)
         if len(trunk_out.shape) < 4:
             trunk_out = trunk_out.unsqueeze(0) # shape = [1, trunk_n, dim, neurons]
@@ -99,8 +99,8 @@ def _forward_branch(self, function_set, iteration_num=-1, device='cpu'):
             self.branch(discrete_fn_batch)
 
     def fix_branch_input(self, function, device='cpu'):
-        """Fixes the branch net for a given function. this function will then be used 
-        in every following forward call. To set a new function just call this method 
+        """Fixes the branch net for a given function. this function will then be used
+        in every following forward call. To set a new function just call this method
         again.
 
         Parameters
@@ -110,4 +110,4 @@ def fix_branch_input(self, function, device='cpu'):
         device : str, optional
             The device where the data lays. Default is 'cpu'.
         """
-        self.branch.fix_input(function, device=device)
\ No newline at end of file
+        self.branch.fix_input(function, device=device)
diff --git a/src/torchphysics/models/deeponet/layers.py b/src/torchphysics/models/deeponet/layers.py
index b1889a58..362934c5 100644
--- a/src/torchphysics/models/deeponet/layers.py
+++ b/src/torchphysics/models/deeponet/layers.py
@@ -37,6 +37,7 @@ class TrunkLinear(torch.nn.Module):
     """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`, similar
     to torch.nn.Linear, but assumes the input `x` to be identical along the first batch axis,
     since this is the case in our implementation of trunk nets.
+
     Args:
         in_features: size of each input sample
         out_features: size of each output sample
@@ -62,6 +63,7 @@ class TrunkLinear(torch.nn.Module):
         >>> output = m(input)
         >>> print(output.size())
         torch.Size([128, 30])
+
     """
     __constants__ = ['in_features', 'out_features']
 
@@ -94,4 +96,4 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
     def extra_repr(self) -> str:
         return 'in_features={}, out_features={}, bias={}'.format(
             self.in_features, self.out_features, self.bias is not None
-        )
\ No newline at end of file
+        )
diff --git a/src/torchphysics/models/deeponet/trunknets.py b/src/torchphysics/models/deeponet/trunknets.py
index 3b77d611..67858741 100644
--- a/src/torchphysics/models/deeponet/trunknets.py
+++ b/src/torchphysics/models/deeponet/trunknets.py
@@ -14,13 +14,14 @@ class TrunkNet(Model):
     input_space : Space
         The space of the points that can be put into this model.
     trunk_input_copied : bool, optional
-        If every sample function of the branch input gets evaluated at the same trunk input, 
+        If every sample function of the branch input gets evaluated at the same trunk input,
         the evaluation process can be speed up, since the trunk only has to evaluated once
-        for the whole data batch of branch inputs. 
+        for the whole data batch of branch inputs.
         If this is the case, set trunk_input_copied = True.
         If for example a dataset with different trunk inputs for each branch function
-        is used, set trunk_input_copied = False. Else this may lead to unexpected 
+        is used, set trunk_input_copied = False. Else this may lead to unexpected
         behavior.
+
     """
     def __init__(self, input_space, trunk_input_copied=True):
         super().__init__(input_space, output_space=None)
@@ -28,25 +29,28 @@ def __init__(self, input_space, trunk_input_copied=True):
         self.trunk_input_copied = trunk_input_copied
 
     def finalize(self, output_space, output_neurons):
-        """Method to set the output space and output neurons of the network. 
+        """Method to set the output space and output neurons of the network.
         Will be called once the BranchNet is connected to the TrunkNet, so
         that both will have a fitting output shape.
 
+        Parameters
+        ----------
         output_space : Space
             The space in which the final output of the DeepONet will belong to.
         output_neurons : int
-            The number of output neurons. Will be multiplied my the dimension of the 
-            output space, so each dimension will have the same number of 
+            The number of output neurons. Will be multiplied my the dimension of the
+            output space, so each dimension will have the same number of
             intermediate neurons.
+
         """
         self.output_neurons = output_neurons
         self.output_space = output_space
 
     def _reshape_multidimensional_output(self, output):
         if len(output.shape) == 3:
-            return output.reshape(output.shape[0], output.shape[1], self.output_space.dim, 
+            return output.reshape(output.shape[0], output.shape[1], self.output_space.dim,
                                   int(self.output_neurons/self.output_space.dim))
-        return output.reshape(-1, self.output_space.dim, 
+        return output.reshape(-1, self.output_space.dim,
                               int(self.output_neurons/self.output_space.dim))
 
 
@@ -84,13 +88,13 @@ class FCTrunkNet(TrunkNet):
         of hidden layers, while the i-th entry will determine the number
         of neurons of each layer.
     activations : torch.nn or list, optional
-        The activation functions of this network. 
+        The activation functions of this network.
         Deafult is nn.Tanh().
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
-        Default is 5/3. 
+        Default is 5/3.
     """
-    def __init__(self, input_space, hidden=(20,20,20), activations=nn.Tanh(), xavier_gains=5/3, 
+    def __init__(self, input_space, hidden=(20,20,20), activations=nn.Tanh(), xavier_gains=5/3,
                  trunk_input_copied=True):
         super().__init__(input_space, trunk_input_copied=trunk_input_copied)
         self.hidden = hidden
@@ -102,16 +106,16 @@ def finalize(self, output_space, output_neurons):
         super().finalize(output_space, output_neurons)
         # special layer architecture is used if trunk data is copied -> faster training
         if self.trunk_input_copied:
-            layers = construct_FC_trunk_layers(hidden=self.hidden, input_dim=self.input_space.dim, 
-                        output_dim=self.output_neurons, activations=self.activations, 
+            layers = construct_FC_trunk_layers(hidden=self.hidden, input_dim=self.input_space.dim,
+                        output_dim=self.output_neurons, activations=self.activations,
                         xavier_gains=self.xavier_gains)
         else:
-            layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_space.dim, 
-                        output_dim=self.output_neurons, activations=self.activations, 
+            layers = _construct_FC_layers(hidden=self.hidden, input_dim=self.input_space.dim,
+                        output_dim=self.output_neurons, activations=self.activations,
                         xavier_gains=self.xavier_gains)
 
         self.sequential = nn.Sequential(*layers)
 
     def forward(self, points):
         points = self._fix_points_order(points)
-        return self._reshape_multidimensional_output(self.sequential(points.as_tensor))
\ No newline at end of file
+        return self._reshape_multidimensional_output(self.sequential(points.as_tensor))
diff --git a/src/torchphysics/models/deepritz.py b/src/torchphysics/models/deepritz.py
index 6a31539a..b2acddf9 100644
--- a/src/torchphysics/models/deepritz.py
+++ b/src/torchphysics/models/deepritz.py
@@ -5,7 +5,7 @@
 
 class DeepRitzNet(Model):
     """
-    Implementation of the architecture used in the Deep Ritz paper [1].
+    Implementation of the architecture used in the Deep Ritz paper [1]_.
     Consists of fully connected layers and residual connections.
 
     Parameters
@@ -21,7 +21,7 @@ class DeepRitzNet(Model):
 
     Notes
     -----
-    ..  [1] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
+    ..  [#] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
         algorithm for solving variational problems", 2017
     """
     def __init__(self, input_space, output_space, width, depth):
@@ -44,5 +44,5 @@ def forward(self, x):
             x_temp = torch.relu(layer1(x)**3)
             x_temp = torch.relu(layer2(x_temp)**3)
             x = x_temp + x
-        
-        return Points(self.linearOut(x), self.output_space)
\ No newline at end of file
+
+        return Points(self.linearOut(x), self.output_space)
diff --git a/src/torchphysics/models/fcn.py b/src/torchphysics/models/fcn.py
index c50842e2..c9f2f207 100644
--- a/src/torchphysics/models/fcn.py
+++ b/src/torchphysics/models/fcn.py
@@ -50,7 +50,7 @@ class FCN(Model):
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
         The gain can be specified over this value.
-        Default is 5/3. 
+        Default is 5/3.
     """
     def __init__(self,
                  input_space,
@@ -60,8 +60,8 @@ def __init__(self,
                  xavier_gains=5/3):
         super().__init__(input_space, output_space)
 
-        layers = _construct_FC_layers(hidden=hidden, input_dim=self.input_space.dim, 
-                                      output_dim=self.output_space.dim, 
+        layers = _construct_FC_layers(hidden=hidden, input_dim=self.input_space.dim,
+                                      output_dim=self.output_space.dim,
                                       activations=activations, xavier_gains=xavier_gains)
 
         self.sequential = nn.Sequential(*layers)
@@ -72,10 +72,10 @@ def forward(self, points):
 
 
 class Harmonic_FCN(Model):
-    """A fully connected neural network, that for the input :math:`x` will also 
-    compute (and use) the values 
+    """A fully connected neural network, that for the input :math:`x` will also
+    compute (and use) the values
     :math:`(\cos(\pi x), \sin(\pi x), ..., \cos(n \pi x), \sin(n \pi x))`.
-    as an input. See for example [1], for some theoretical background, on why this may be 
+    as an input. See for example [#]_, for some theoretical background, on why this may be
     advantageous.
     Should be used in sequence with a normalization layer, to get inputs in the range
     of [-1, 1] with the cos/sin functions.
@@ -96,7 +96,7 @@ class Harmonic_FCN(Model):
         The highest frequenz that should be used in the input computation.
         Equal to :math:`n` in the above describtion.
     min_frequenz : int
-        The smallest frequenz that should be used. Usefull, if it is expected, that 
+        The smallest frequenz that should be used. Usefull, if it is expected, that
         only higher frequenzies appear in the solution.
         Default is 0.
     activations : torch.nn or list, optional
@@ -107,24 +107,24 @@ class Harmonic_FCN(Model):
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
         The gain can be specified over this value.
-        Default is 5/3. 
+        Default is 5/3.
 
     Notes
     -----
-    ..  [1] Tancik, Matthew and Srinivasan, Pratul P. and Mildenhall, Ben et al., 
-        "Fourier Features Let Networks Learn High Frequency Functions in Low Dimensional 
+    ..  [#] Tancik, Matthew and Srinivasan, Pratul P. and Mildenhall, Ben et al.,
+        "Fourier Features Let Networks Learn High Frequency Functions in Low Dimensional
         Domains", 2020
     """
-    def __init__(self, input_space, output_space, max_frequenz : int, 
+    def __init__(self, input_space, output_space, max_frequenz : int,
                  hidden=(20,20,20), min_frequenz : int = 0,
                  activations=nn.Tanh(), xavier_gains=5/3):
         assert max_frequenz > min_frequenz, "used max frequenz has to be > min frequenz"
         super().__init__(input_space, output_space)
         self.max_frequenz = max_frequenz
         self.min_frequenz = min_frequenz
-        layers = _construct_FC_layers(hidden=hidden, 
-                                      input_dim=(2*(max_frequenz-min_frequenz)+1) * self.input_space.dim, 
-                                      output_dim=self.output_space.dim, 
+        layers = _construct_FC_layers(hidden=hidden,
+                                      input_dim=(2*(max_frequenz-min_frequenz)+1) * self.input_space.dim,
+                                      output_dim=self.output_space.dim,
                                       activations=activations, xavier_gains=xavier_gains)
 
         self.sequential = nn.Sequential(*layers)
@@ -136,4 +136,4 @@ def forward(self, points):
             points_list.append(torch.cos((i+1) * math.pi * points))
             points_list.append(torch.sin((i+1) * math.pi * points))
         points = torch.cat(points_list, dim=-1)
-        return Points(self.sequential(points), self.output_space)
\ No newline at end of file
+        return Points(self.sequential(points), self.output_space)
diff --git a/src/torchphysics/models/model.py b/src/torchphysics/models/model.py
index 8f95a1fc..3274a8e1 100644
--- a/src/torchphysics/models/model.py
+++ b/src/torchphysics/models/model.py
@@ -18,7 +18,7 @@ def __init__(self, input_space, output_space):
         super().__init__()
         self.input_space = input_space
         self.output_space = output_space
-    
+
     def _fix_points_order(self, points):
         if points.space != self.input_space:
             if points.space.keys() != self.input_space.keys():
@@ -53,7 +53,7 @@ def __init__(self, domain):
         for i in range(domain.dim):
             diag.append(maxs[i] - mins[i])
             bias.append((maxs[i] + mins[i])/2)
-        
+
         diag = 2./torch.tensor(diag)
         bias = -torch.tensor(bias)*diag
         with torch.no_grad():
@@ -73,7 +73,7 @@ class Parallel(Model):
     *models :
         The models that should be evaluated parallel. The evaluation
         happens in the order that the models are passed in.
-        The outputs of the models will be concatenated. 
+        The outputs of the models will be concatenated.
         The models are not allowed to have the same output spaces, but can
         have the same input spaces.
     """
@@ -86,7 +86,7 @@ def __init__(self, *models):
             output_space = output_space * model.output_space
         super().__init__(input_space, output_space)
         self.models = nn.ModuleList(models)
-    
+
     def forward(self, points):
         out = []
         for model in self.models:
@@ -98,7 +98,7 @@ class Sequential(Model):
 
     Parameters
     ----------
-    *models : 
+    *models :
         The models that should be evaluated sequentially. The evaluation
         happens in the order that the models are passed in.
         To work correcty the output of the i-th model has to fit the input
@@ -107,7 +107,7 @@ class Sequential(Model):
     def __init__(self, *models):
         super().__init__(models[0].input_space, models[-1].output_space)
         self.models = nn.ModuleList(models)
-    
+
     def forward(self, points):
         points = self._fix_points_order(points)
         for model in self.models:
@@ -118,7 +118,7 @@ def forward(self, points):
 class AdaptiveWeightLayer(nn.Module):
     """
     Adds adaptive weights to the non-reduced loss. The weights are maximized by
-    reversing the gradients, similar to the idea in [1].
+    reversing the gradients, similar to the idea in [#]_.
     Should currently only be used with fixed points.
 
     Parameters
@@ -128,7 +128,7 @@ class AdaptiveWeightLayer(nn.Module):
 
     Notes
     -----
-    ..  [1] L. McClenny, "Self-Adaptive Physics-Informed Neural Networks using a Soft
+    ..  [#] L. McClenny, "Self-Adaptive Physics-Informed Neural Networks using a Soft
         Attention Mechanism", 2020.
     """
     class GradReverse(torch.autograd.Function):
diff --git a/src/torchphysics/models/qres.py b/src/torchphysics/models/qres.py
index 08cfc303..7c01faf2 100644
--- a/src/torchphysics/models/qres.py
+++ b/src/torchphysics/models/qres.py
@@ -12,28 +12,28 @@ class Quadratic(nn.Module):
 
     Parameters
     ----------
-    in_features : int 
+    in_features : int
         size of each input sample.
     out_features :
         size of each output sample.
     xavier_gains : float or list
         For the weight initialization a Xavier/Glorot algorithm will be used.
         The gain can be specified over this value.
-        Default is 5/3. 
+        Default is 5/3.
     """
     def __init__(self, in_features, out_features, xavier_gains):
         super().__init__()
-        bias = torch.nn.init.xavier_normal_(torch.zeros(1, out_features), 
-                                            gain=xavier_gains) 
+        bias = torch.nn.init.xavier_normal_(torch.zeros(1, out_features),
+                                            gain=xavier_gains)
         self.bias = torch.nn.Parameter(bias)
-        self.linear_weights = torch.nn.Linear(in_features=in_features, 
-                                              out_features=out_features, 
+        self.linear_weights = torch.nn.Linear(in_features=in_features,
+                                              out_features=out_features,
                                               bias=False)
-        torch.nn.init.xavier_normal_(self.linear_weights.weight, gain=xavier_gains)    
-        self.quadratic_weights = torch.nn.Linear(in_features=in_features, 
+        torch.nn.init.xavier_normal_(self.linear_weights.weight, gain=xavier_gains)
+        self.quadratic_weights = torch.nn.Linear(in_features=in_features,
                                                  out_features=out_features,
                                                  bias=False)
-        torch.nn.init.xavier_normal_(self.quadratic_weights.weight, gain=xavier_gains)                                 
+        torch.nn.init.xavier_normal_(self.quadratic_weights.weight, gain=xavier_gains)
 
     def forward(self, points):
         linear_out = self.linear_weights(points)
@@ -50,9 +50,9 @@ def out_features(self):
 
 
 class QRES(Model):
-    """Implements the quadratic residual networks from [1].
+    """Implements the quadratic residual networks from [#]_.
     Instead of a linear layer, a quadratic layer W_1*x (*) W_2*x + W_1*x + b
-    will be used. Here (*) means the hadamard product of two vectors 
+    will be used. Here (*) means the hadamard product of two vectors
     (elementwise multiplication).
 
     Parameters
@@ -75,12 +75,12 @@ class QRES(Model):
     xavier_gains : float or list, optional
         For the weight initialization a Xavier/Glorot algorithm will be used.
         The gain can be specified over this value.
-        Default is 5/3. 
+        Default is 5/3.
 
     Notes
     -----
-    ..  [1] Jie Bu and Anuj Karpatne, "Quadratic Residual Networks: 
-        A New Class of Neural Networks for Solving Forward and Inverse Problems 
+    ..  [#] Jie Bu and Anuj Karpatne, "Quadratic Residual Networks:
+        A New Class of Neural Networks for Solving Forward and Inverse Problems
         in Physics Involving PDEs", 2021
     """
     def __init__(self,
@@ -107,4 +107,4 @@ def __init__(self,
         self.sequential = nn.Sequential(*layers)
 
     def forward(self, points):
-        return Points(self.sequential(points), self.output_space)
\ No newline at end of file
+        return Points(self.sequential(points), self.output_space)
diff --git a/src/torchphysics/problem/conditions/condition.py b/src/torchphysics/problem/conditions/condition.py
index 13ad14cb..02d06e20 100644
--- a/src/torchphysics/problem/conditions/condition.py
+++ b/src/torchphysics/problem/conditions/condition.py
@@ -274,7 +274,7 @@ def _move_static_data(self, device):
 class MeanCondition(SingleModuleCondition):
     """
     A condition that minimizes the mean of the residual of a single module, can be
-    used e.g. in Deep Ritz Method [1] or for energy functionals, since the mean can
+    used e.g. in Deep Ritz Method [#]_ or for energy functionals, since the mean can
     be seen as a (scaled) integral approximation.
 
     Parameters
@@ -306,7 +306,7 @@ class MeanCondition(SingleModuleCondition):
 
     Notes
     -----
-    ..  [1] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
+    ..  [#] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
         algorithm for solving variational problems", 2017
     """
 
@@ -349,7 +349,7 @@ class DeepRitzCondition(MeanCondition):
 
     Notes
     -----
-    ..  [1] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
+    ..  [#] Weinan E and Bing Yu, "The Deep Ritz method: A deep learning-based numerical
         algorithm for solving variational problems", 2017
     """
     def __init__(self, module, sampler, integrand_fn, track_gradients=True, data_functions={},
@@ -362,7 +362,7 @@ def __init__(self, module, sampler, integrand_fn, track_gradients=True, data_fun
 class PINNCondition(SingleModuleCondition):
     """
     A condition that minimizes the mean squared error of the given residual, as required in
-    the framework of physics-informed neural networks [1].
+    the framework of physics-informed neural networks [#]_.
 
     Parameters
     -------
@@ -393,7 +393,7 @@ class PINNCondition(SingleModuleCondition):
 
     Notes
     -----
-    ..  [1] M. Raissi, "Physics-informed neural networks: A deep learning framework for
+    ..  [#] M. Raissi, "Physics-informed neural networks: A deep learning framework for
         solving forward and inverse problems involving nonlinear partial differential
         equations", Journal of Computational Physics, vol. 378, pp. 686-707, 2019.
     """
@@ -490,7 +490,7 @@ def forward(self, device='cpu', iteration=None):
             self.last_unreduced_loss = None
         else:
             x_b = self.non_periodic_sampler.sample_points(device=device)
-        
+
         x_left = self.left_sampler.sample_points(device=device)
         x_right = self.right_sampler.sample_points(device=device)
 
@@ -664,7 +664,7 @@ def _move_static_data(self, device):
 
 class AdaptiveWeightsCondition(SingleModuleCondition):
     """
-    A condition using an AdaptiveWeightLayer [1] to assign adaptive weights to all points
+    A condition using an AdaptiveWeightLayer [#]_ to assign adaptive weights to all points
     during training.
 
     Parameters
@@ -700,7 +700,7 @@ class AdaptiveWeightsCondition(SingleModuleCondition):
 
     Notes
     -----
-    ..  [1] Levi D. McClenny, "Self-Adaptive Physics-Informed Neural Networks using a
+    ..  [#]_ Levi D. McClenny, "Self-Adaptive Physics-Informed Neural Networks using a
         Soft Attention Mechanism", CoRR, 2020
     """
 
diff --git a/src/torchphysics/problem/conditions/deeponet_condition.py b/src/torchphysics/problem/conditions/deeponet_condition.py
index 9cd0d630..64a2654f 100644
--- a/src/torchphysics/problem/conditions/deeponet_condition.py
+++ b/src/torchphysics/problem/conditions/deeponet_condition.py
@@ -7,7 +7,7 @@
 
 class DeepONetSingleModuleCondition(Condition):
 
-    def __init__(self, deeponet_model, function_set, input_sampler, residual_fn, 
+    def __init__(self, deeponet_model, function_set, input_sampler, residual_fn,
                  error_fn, reduce_fn=torch.mean,
                  name='singlemodulecondition', track_gradients=True, data_functions={},
                  parameter=Parameter.empty(), weight=1.0):
@@ -56,7 +56,7 @@ def forward(self, device='cpu', iteration=None):
         function_set_output = {}
         if self.eval_function_set:
             function_set_output = self.function_set.create_function_batch(x[0,:,:]).coordinates
-        
+
         unreduced_loss = self.error_fn(self.residual_fn({**y.coordinates,
                                                          **x_coordinates,
                                                          **function_set_output,
@@ -71,8 +71,8 @@ def forward(self, device='cpu', iteration=None):
 
 class PIDeepONetCondition(DeepONetSingleModuleCondition):
     """
-    A condition that minimizes the mean squared error of the given residual, as 
-    required in the framework of physics-informed DeepONets [1].
+    A condition that minimizes the mean squared error of the given residual, as
+    required in the framework of physics-informed DeepONets [#]_.
 
     Parameters
     -------
@@ -105,17 +105,17 @@ class PIDeepONetCondition(DeepONetSingleModuleCondition):
 
     Notes
     -----
-    ..  [1] Wang, Sifan and Wang, Hanwen and Perdikaris,
+    ..  [#] Wang, Sifan and Wang, Hanwen and Perdikaris,
         "Learning the solution operator of parametric partial
-        differential equations with physics-informed DeepOnets", 
+        differential equations with physics-informed DeepOnets",
         https://arxiv.org/abs/2103.10974, 2021.
     """
-    def __init__(self, deeponet_model, function_set, input_sampler, residual_fn, 
+    def __init__(self, deeponet_model, function_set, input_sampler, residual_fn,
                  name='pinncondition', track_gradients=True, data_functions={},
                  parameter=Parameter.empty(), weight=1.0):
-        super().__init__(deeponet_model, function_set, input_sampler, 
-                         residual_fn=residual_fn, error_fn=SquaredError(), 
-                         reduce_fn=torch.mean, name=name, 
+        super().__init__(deeponet_model, function_set, input_sampler,
+                         residual_fn=residual_fn, error_fn=SquaredError(),
+                         reduce_fn=torch.mean, name=name,
                          track_gradients=track_gradients, data_functions=data_functions,
                          parameter=parameter, weight=weight)
 
@@ -155,10 +155,10 @@ class DeepONetDataCondition(DataCondition):
         training.
     """
 
-    def __init__(self, module, dataloader, norm, constrain_fn = None, 
+    def __init__(self, module, dataloader, norm, constrain_fn = None,
                  root=1., use_full_dataset=False, name='datacondition', weight=1.0):
-        super().__init__(module=module, dataloader=dataloader, 
-                         norm=norm, root=root, use_full_dataset=use_full_dataset, 
+        super().__init__(module=module, dataloader=dataloader,
+                         norm=norm, root=root, use_full_dataset=use_full_dataset,
                          name=name, weight=weight, constrain_fn=constrain_fn)
         assert isinstance(self.module, DeepONet)
 
@@ -172,4 +172,4 @@ def _compute_dist(self, batch, device):
             model_out = self.constrain_fn({**model_out.coordinates, **trunk_in.coordinates})
         else:
             model_out = model_out.as_tensor
-        return torch.abs(model_out - out.as_tensor)
\ No newline at end of file
+        return torch.abs(model_out - out.as_tensor)
diff --git a/src/torchphysics/problem/samplers/random_samplers.py b/src/torchphysics/problem/samplers/random_samplers.py
index bbec7972..935cb95d 100644
--- a/src/torchphysics/problem/samplers/random_samplers.py
+++ b/src/torchphysics/problem/samplers/random_samplers.py
@@ -21,7 +21,7 @@ class RandomUniformSampler(PointSampler):
         The desiered density of the created points.
     filter : callable, optional
         A function that restricts the possible positions of sample points.
-        A point that is allowed should return True, therefore a point that should be 
+        A point that is allowed should return True, therefore a point that should be
         removed must return False. The filter has to be able to work with a batch
         of inputs.
         The Sampler will use a rejection sampling to find the right amount of points.
@@ -33,7 +33,7 @@ def __init__(self, domain, n_points=None, density=None, filter_fn=None):
     def _sample_points(self, params=Points.empty(), device='cpu'):
         if self.n_points:
             rand_points = self.domain.sample_random_uniform(self.n_points,
-                                                            params=params, 
+                                                            params=params,
                                                             device=device)
             repeated_params = self._repeat_params(params, len(self))
             return rand_points.join(repeated_params)
@@ -48,7 +48,7 @@ def _sample_points_with_filter(self, params=Points.empty(), device='cpu'):
         if self.n_points:
             sample_points = self._sample_n_points_with_filter(params, device)
         else:
-            # for density sampling, just sample normally and afterwards remove all 
+            # for density sampling, just sample normally and afterwards remove all
             # points that are not allowed
             sample_points = self._sample_points(params, device)
             sample_points = self._apply_filter(sample_points)
@@ -77,7 +77,7 @@ def _sample_n_points_with_filter(self, params, device):
             # if to many points were sampled, delete them.
             cuted_points = self._cut_tensor_to_length_n(new_sample_points)
             sample_points = self._set_sampled_points(sample_points, cuted_points)
-        return sample_points 
+        return sample_points
 
 
 class GaussianSampler(PointSampler):
@@ -89,7 +89,7 @@ class GaussianSampler(PointSampler):
     domain : torchphysics.domain.Domain
         The domain in which the points should be sampled.
     n_points : int
-        The number of points that should be sampled. 
+        The number of points that should be sampled.
     mean : list, array or tensor
         The center/mean of the distribution. Has to fit the dimension
         of the given domain.
@@ -111,7 +111,7 @@ def _check_mean_correct_dim(self):
         elif not isinstance(self.mean, torch.Tensor):
             self.mean = torch.FloatTensor(self.mean)
         assert len(self.mean) == self.domain.dim, \
-            f"""Dimension of mean: {self.mean}, does not fit the domain.""" 
+            f"""Dimension of mean: {self.mean}, does not fit the domain."""
 
     def _sample_points(self, params=Points.empty(), device='cpu'):
         self._set_device_of_mean_and_std(device)
@@ -147,7 +147,7 @@ def _check_inside_domain(self, new_points):
 
 
 class LHSSampler(PointSampler):
-    """Will create a simple latin hypercube sampling [1] in the given domain.
+    """Will create a simple latin hypercube sampling [#]_ in the given domain.
     Only works for the inner part of a domain, not the boundary!
 
     Parameters
@@ -155,14 +155,14 @@ class LHSSampler(PointSampler):
     domain : torchphysics.domain.Domain
         The domain in which the points should be sampled.
     n_points : int
-        The number of points that should be sampled. 
+        The number of points that should be sampled.
 
     Notes
     -----
     A bounding box is used tp create the lhs-points in the domain.
-    Points outside will be rejected and additional random uniform points will be 
+    Points outside will be rejected and additional random uniform points will be
     added to get a total number of n_points.
-    ..  [1] https://en.wikipedia.org/wiki/Latin_hypercube_sampling
+    ..  [#] https://en.wikipedia.org/wiki/Latin_hypercube_sampling
     """
     def __init__(self, domain, n_points):
         assert not isinstance(domain, BoundaryDomain), \
@@ -186,7 +186,7 @@ def _create_lhs_in_bounding_box(self, bounding_box, device):
         lhs_points = torch.zeros((self.n_points, self.domain.dim), device=device)
         # for each axis apply the lhs strategy
         for i in range(self.domain.dim):
-            axis_grid = torch.linspace(bounding_box[2*i], bounding_box[2*i+1], 
+            axis_grid = torch.linspace(bounding_box[2*i], bounding_box[2*i+1],
                                        steps=self.n_points+1, device=device)[:-1] # dont need endpoint
             axis_length = bounding_box[2*i+1] - bounding_box[2*i]
             random_shift = axis_length/self.n_points * torch.rand(self.n_points,
@@ -235,7 +235,7 @@ class AdaptiveThresholdRejectionSampler(AdaptiveSampler):
         density will change loccally during iterations.
     filter : callable, optional
         A function that restricts the possible positions of sample points.
-        A point that is allowed should return True, therefore a point that should be 
+        A point that is allowed should return True, therefore a point that should be
         removed must return False. The filter has to be able to work with a batch
         of inputs.
         The Sampler will use a rejection sampling to find the right amount of points.
@@ -250,7 +250,7 @@ def __init__(self, domain, resample_ratio, n_points=None, density=None,
             n_points=n_points,
             density=density,
             filter_fn=filter_fn)
-        
+
         self.last_points = None
 
     def sample_points(self, unreduced_loss=None, params=Points.empty(), device='cpu'):
@@ -282,7 +282,7 @@ class AdaptiveRandomRejectionSampler(AdaptiveSampler):
         density will change loccally during iterations.
     filter : callable, optional
         A function that restricts the possible positions of sample points.
-        A point that is allowed should return True, therefore a point that should be 
+        A point that is allowed should return True, therefore a point that should be
         removed must return False. The filter has to be able to work with a batch
         of inputs.
         The Sampler will use a rejection sampling to find the right amount of points.
@@ -296,7 +296,7 @@ def __init__(self, domain, n_points=None, density=None,
             n_points=n_points,
             density=density,
             filter_fn=filter_fn)
-        
+
         self.last_points = None
 
     def sample_points(self, unreduced_loss=None, params=Points.empty(), device='cpu'):
diff --git a/src/torchphysics/solver.py b/src/torchphysics/solver.py
index 6ab24545..7cb8e681 100644
--- a/src/torchphysics/solver.py
+++ b/src/torchphysics/solver.py
@@ -46,8 +46,9 @@ def __init__(self,
         self.train_conditions = nn.ModuleList(train_conditions)
         self.val_conditions = nn.ModuleList(val_conditions)
         self.optimizer_setting = optimizer_setting
-    
+
     def train_dataloader(self):
+        """"""
         # HACK: create an empty trivial dataloader, since real data is loaded
         # in conditions
         steps = self.trainer.max_steps
@@ -57,14 +58,15 @@ def train_dataloader(self):
                 "of 1000 steps.")
             steps = 1000
         return torch.utils.data.DataLoader(torch.empty(steps))
-    
+
     def val_dataloader(self):
+        """"""
         # HACK: we perform only a single step during validation,
         return torch.utils.data.DataLoader(torch.empty(1))
 
     def _set_lr_scheduler(self, optimizer):
         lr_scheduler = self.scheduler['class'](optimizer, **self.scheduler['args'])
-        lr_scheduler = {'scheduler': lr_scheduler, 'name': 'learning_rate', 
+        lr_scheduler = {'scheduler': lr_scheduler, 'name': 'learning_rate',
                         'interval': 'epoch', 'frequency': 1}
         for input_name in self.scheduler:
             if not input_name in ['class', 'args']:
@@ -107,7 +109,7 @@ def configure_optimizers(self):
         lr_scheduler = self.optimizer_setting.scheduler_class(optimizer,
             **self.optimizer_setting.scheduler_args
         )
-        lr_scheduler = {'scheduler': lr_scheduler, 'name': 'learning_rate', 
+        lr_scheduler = {'scheduler': lr_scheduler, 'name': 'learning_rate',
                         'interval': 'step',
                         'frequency': self.optimizer_setting.scheduler_frequency}
         for input_name in self.optimizer_setting.scheduler_args:
diff --git a/src/torchphysics/utils/differentialoperators.py b/src/torchphysics/utils/differentialoperators.py
index f4bd0088..3ac1879f 100644
--- a/src/torchphysics/utils/differentialoperators.py
+++ b/src/torchphysics/utils/differentialoperators.py
@@ -130,7 +130,7 @@ def normal_derivative(model_out, normals, *derivative_variable):
     gradient = grad(model_out, *derivative_variable)
     normal_derivatives = gradient*normals
     return normal_derivatives.sum(dim=-1, keepdim=True)
-    
+
 
 def div(model_out, *derivative_variable):
     '''Computes the divergence of a network with respect to the given variable.
@@ -141,7 +141,7 @@ def div(model_out, *derivative_variable):
         The output tensor of the neural network
     derivative_variable : torch.tensor
         The input tensor of the variables in which respect the derivatives have to
-        be computed. Have to be in a consistent ordering, if for example the output 
+        be computed. Have to be in a consistent ordering, if for example the output
         is u = (u_x, u_y) than the variables has to passed in the order (x, y)
     Returns
     ----------
@@ -171,7 +171,7 @@ def div(model_out, *derivative_variable):
         The output tensor of the neural network
     derivative_variable : torch.tensor
         The input tensor of the variables in which respect the derivatives have to
-        be computed. Have to be in a consistent ordering, if for example the output 
+        be computed. Have to be in a consistent ordering, if for example the output
         is u = (u_x, u_y) than the variables has to passed in the order (x, y)
 
     Returns
@@ -278,9 +278,9 @@ def rot(model_out, *derivative_variable):
     """
     jacobian = jac(model_out, *derivative_variable)
     rotation = torch.zeros((len(derivative_variable[0]), 3))
-    rotation[:, 0] = jacobian[:, 2, 1] - jacobian[:, 1, 2] 
-    rotation[:, 1] = jacobian[:, 0, 2] - jacobian[:, 2, 0] 
-    rotation[:, 2] = jacobian[:, 1, 0] - jacobian[:, 0, 1] 
+    rotation[:, 0] = jacobian[:, 2, 1] - jacobian[:, 1, 2]
+    rotation[:, 1] = jacobian[:, 0, 2] - jacobian[:, 2, 0]
+    rotation[:, 2] = jacobian[:, 1, 0] - jacobian[:, 0, 1]
     return rotation
 
 
@@ -299,7 +299,7 @@ def partial(model_out, *derivative_variables):
     Returns
     ----------
     torch.tensor
-        A Tensor, where every row contains the values of the computed partial 
+        A Tensor, where every row contains the values of the computed partial
         derivative of the model w.r.t the row of the input variable.
     '''
     du = model_out
@@ -338,7 +338,7 @@ def convective(deriv_out, convective_field, *derivative_variable):
 
 
 def sym_grad(model_out, *derivative_variable):
-    """Computes the symmetric gradient: :math:`0.5(\nabla u + \nabla u^T)`.
+    """Computes the symmetric gradient: :math:`0.5(\\nabla u + \\nabla u^T)`.
 
     Parameters
     ----------
@@ -348,9 +348,9 @@ def sym_grad(model_out, *derivative_variable):
         The spatial variable in which respect model_out should be differentiated.
 
     Returns
-    ----------
+    -------
     torch.tensor
-        A Tensor of matrices of the form (batch, dim, dim), containing the 
+        A Tensor of matrices of the form (batch, dim, dim), containing the
         symmetric gradient.
     """
     jac_matrix = jac(model_out, *derivative_variable)
@@ -370,13 +370,13 @@ def matrix_div(model_out, *derivative_variable):
     Returns
     ----------
     torch.tensor
-        A Tensor of vectors of the form (batch, dim), containing the 
+        A Tensor of vectors of the form (batch, dim), containing the
         divegrence of the input.
     """
-    div_out = torch.zeros((len(model_out), model_out.shape[1]), 
+    div_out = torch.zeros((len(model_out), model_out.shape[1]),
                           device=model_out.device)
     for i in range(model_out.shape[1]):
-        # compute divergence of matrix by computing the divergence 
+        # compute divergence of matrix by computing the divergence
         # for each row
         current_row = model_out.narrow(1, i, 1).squeeze(1)
         div_out[:, i:i+1] = div(current_row, *derivative_variable)