diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e02077a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+test:
+	pytest
+
+coverage:
+	coverage run -m pytest && coverage report --show-missing
+
+lint:
+	black liltorch
diff --git a/README.md b/README.md
index 3fdfb3d..6a9ebb0 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,12 @@
-Yes
\ No newline at end of file
+# LilTorch
+
+![Logo](documentation/docs/images/logo-torch.png)
+
+LilTorch is a lightweight library for Deep Learning, created entirely in Python with Numpy as its sole dependency.
+This library allows you to design and understand the internals of Neural Networks without the need for C/C++ imported code and binaries.
+Everything is as understandable as Python itself.
+
+[Documentation here](https://liltorchdocs.netlify.app/)
+
+**Note**: This library is intended for educational purposes only. It is not recommended for production use, and execution speed is not a primary focus.
+
diff --git a/documentation/docs/Documentation/API-Reference.md b/documentation/docs/Documentation/API-Reference.md
index c945c13..f771016 100644
--- a/documentation/docs/Documentation/API-Reference.md
+++ b/documentation/docs/Documentation/API-Reference.md
@@ -1,7 +1,21 @@
+<a id="__init__"></a>
+
+# \_\_init\_\_
+
+<a id="nn"></a>
+
+# nn
+
 <a id="nn.loss"></a>
 
 # nn.loss
 
+This module implements commonly used loss functions for neural networks.
+
+Loss functions measure the difference between the model's predictions and the ground truth labels.
+Minimizing the loss function during training helps the model learn accurate representations.
+This module provides functions for popular loss functions like mean squared error, cross-entropy, etc.
+
 <a id="nn.loss.MeanSquaredError"></a>
 
 ## MeanSquaredError Objects
@@ -57,3 +71,263 @@ Compute the gradient of the Mean Squared Error with respect to the predicted val
 
 - `np.ndarray` - The gradient of the loss with respect to y_pred.
 
+<a id="nn.layer"></a>
+
+# nn.layer
+
+<a id="nn.layer.Layer"></a>
+
+## Layer Objects
+
+```python
+class Layer()
+```
+
+This is a abstract class
+
+<a id="nn.network"></a>
+
+# nn.network
+
+<a id="nn.network.Network"></a>
+
+## Network Objects
+
+```python
+class Network()
+```
+
+A basic neural network class for building and training multi-layer networks.
+
+This class provides a framework for creating and using neural networks with customizable layers.
+It supports adding different layer types (inherited from `liltorch.nn.layer.Layer`), performing
+forward and backward passes for training, and updating layer weights using the provided learning rate.
+
+<a id="nn.network.Network.__init__"></a>
+
+#### \_\_init\_\_
+
+```python
+def __init__(lr: float) -> None
+```
+
+Initializes a new neural network.
+
+**Arguments**:
+
+- `lr` - The learning rate used for updating the weights of the layers during training. (float)
+
+<a id="nn.network.Network.add"></a>
+
+#### add
+
+```python
+def add(layer: Layer) -> None
+```
+
+Adds a layer to the neural network.
+
+This method allows you to build your network by sequentially adding different layer types
+(e.g., `Tanh`, `Linear`, etc.) inherited from the `Layer` class.
+
+**Arguments**:
+
+- `layer` - An instance of a layer class from `liltorch.nn.layer`.
+
+<a id="nn.network.Network.forward"></a>
+
+#### forward
+
+```python
+def forward(x: np.ndarray) -> np.ndarray
+```
+
+Performs the forward pass through the network.
+
+This method propagates the input data (`x`) through all the layers in the network,
+applying their respective forward passes sequentially.
+
+**Arguments**:
+
+- `x` - The input data for the network, typically a NumPy array.
+  
+
+**Returns**:
+
+  The output of the network after passing through all the layers. (NumPy array)
+
+<a id="nn.network.Network.backward"></a>
+
+#### backward
+
+```python
+def backward(error: np.ndarray)
+```
+
+Performs the backward pass for backpropagation.
+
+This method calculates the gradients for all layers in the network using backpropagation.
+It iterates through the layers in reverse order, starting from the output layer and
+propagating the error signal back to the previous layers.
+
+**Arguments**:
+
+- `error` - The error signal from the loss function, typically a NumPy array.
+  
+
+**Returns**:
+
+  The updated error signal to be propagated further back in the network during training
+  (usually not used in the final output layer). (NumPy array)
+
+<a id="nn.fully_connected"></a>
+
+# nn.fully\_connected
+
+<a id="nn.fully_connected.FullyConnectedLayer"></a>
+
+## FullyConnectedLayer Objects
+
+```python
+class FullyConnectedLayer(Layer)
+```
+
+Fully-connected layer (dense layer) for neural networks.
+
+This layer performs a linear transformation on the input data followed by a bias addition.
+It's a fundamental building block for many neural network architectures.
+
+During the forward pass, the input data is multiplied by the weight matrix and then added
+to the bias vector. The resulting output is passed to the next layer in the network.
+
+During the backward pass, the gradients are calculated for both the weights and biases
+using backpropagation. These gradients are used to update the weights and biases
+during training to improve the network's performance.
+
+<a id="nn.fully_connected.FullyConnectedLayer.__init__"></a>
+
+#### \_\_init\_\_
+
+```python
+def __init__(input_size, output_size)
+```
+
+Initializes a fully-connected layer.
+
+**Arguments**:
+
+- `input_size` - The number of neurons in the previous layer (the size of the input vector). (int)
+- `output_size` - The number of neurons in this layer (the size of the output vector). (int)
+
+<a id="nn.fully_connected.FullyConnectedLayer.forward"></a>
+
+#### forward
+
+```python
+def forward(input_data)
+```
+
+Performs the forward pass through the layer.
+
+This method calculates the weighted sum of the input data and the bias vector.
+
+**Arguments**:
+
+- `input_data` - The input data for the layer, a NumPy array of shape (batch_size, input_size).
+  
+
+**Returns**:
+
+  The output of the layer after applying the weights and bias, a NumPy array
+  of shape (batch_size, output_size).
+
+<a id="nn.fully_connected.FullyConnectedLayer.backward"></a>
+
+#### backward
+
+```python
+def backward(upstream_gradients, lr)
+```
+
+Performs the backward pass for backpropagation in this layer.
+
+This method calculates the gradients for the weights, biases, and the error signal
+to be propagated back to the previous layer.
+
+**Arguments**:
+
+- `upstream_gradients` - The gradient signal from the subsequent layer in the network
+  (a NumPy array of shape (batch_size, output_size)).
+- `lr` - The learning rate used for updating the weights and biases during training. (float)
+  
+
+**Returns**:
+
+  The gradient signal to be propagated back to the previous layer in the network
+  (a NumPy array of shape (batch_size, input_size)).
+
+<a id="nn.activation"></a>
+
+# nn.activation
+
+This module implements commonly used activation functions for neural networks.
+
+Activation functions introduce non-linearity into the network, allowing it to learn complex patterns
+in the data. This module provides functions for popular activations like ReLU, sigmoid, tanh, etc.
+
+<a id="nn.activation.Tanh"></a>
+
+## Tanh Objects
+
+```python
+class Tanh(Layer)
+```
+
+TanH activation layer for neural networks.
+
+The Tanh (hyperbolic tangent) activation function introduces non-linearity into the network,
+allowing it to learn complex patterns. It maps input values between -1 and 1.
+
+This class implements the Tanh activation function for the forward and backward passes
+used during neural network training.
+
+<a id="nn.activation.Tanh.forward"></a>
+
+#### forward
+
+```python
+def forward(input_data: np.ndarray) -> np.ndarray
+```
+
+Performs the forward pass using the Tanh activation function.
+
+**Arguments**:
+
+- `input_data` - A NumPy array representing the input data for this layer.
+  
+
+**Returns**:
+
+  A NumPy array containing the output of the Tanh activation function applied to the input data.
+
+<a id="nn.activation.Tanh.backward"></a>
+
+#### backward
+
+```python
+def backward(output_error: np.ndarray, learning_rate: float) -> np.ndarray
+```
+
+Calculates the gradients for the backward pass using the derivative of Tanh.
+
+**Arguments**:
+
+- `output_error` - The error signal propagated from the subsequent layer during backpropagation.
+  (A NumPy array)
+- `learning_rate` - The learning rate used for updating the weights during training. (float)
+  
+
+**Returns**:
+
+  A NumPy array containing the error signal to be propagated back to the previous layer.
+
diff --git a/documentation/docs/Documentation/Examples.md b/documentation/docs/Documentation/Examples.md
new file mode 100644
index 0000000..007a6f0
--- /dev/null
+++ b/documentation/docs/Documentation/Examples.md
@@ -0,0 +1,16 @@
+# Examples
+
+In this section, you'll find a couple of Jupyter notebooks showcasing the practical usage of Liltorch.
+These notebooks demonstrate how to effectively leverage the library's functionalities for various deep learning tasks (more to be added).
+
+## MNIST Digit Classification with 95% Accuracy
+
+This notebook exemplifies the library's capabilities by achieving 95% accuracy on the classic MNIST handwritten digit classification dataset. The notebook guides you through the process of:
+
+    Loading the MNIST Dataset: Learn how to load the MNIST dataset using appropriate libraries within your environment.
+    Building Your Neural Network: Discover how to construct a neural network architecture suitable for the MNIST task, taking advantage of the library's building blocks.
+    Training the Network: Explore the training process, including defining the loss function and training parameters.
+    Evaluating Performance: Witness the evaluation of the trained network's performance on the test set, demonstrating the achieved 95% accuracy.
+
+[Notebook on colab](https://colab.research.google.com/drive/1MLxcA6DC1xxrkY_Ya1zLv-1q7EP0S0Ws?usp=sharing)
+
diff --git a/documentation/docs/images/logo-torch.png b/documentation/docs/images/logo-torch.png
new file mode 100644
index 0000000..0fe3115
Binary files /dev/null and b/documentation/docs/images/logo-torch.png differ
diff --git a/documentation/docs/index.md b/documentation/docs/index.md
index b18e405..a160be1 100644
--- a/documentation/docs/index.md
+++ b/documentation/docs/index.md
@@ -1,5 +1,7 @@
 # LilTorch
+<p align="center">
+<img src="images/logo-torch.png" width="100" height="100" />
+</p>
+[LilTorch](https://github.com/Mateusmsouza/liltorch) is a lightweight library for Deep Learning, created entirely in Python with Numpy as its sole dependency. This library allows you to design and understand the internals of Neural Networks without the need for C/C++ imported code and binaries. Everything is as understandable as Python itself.
 
-LilTorch is a lightweight library for Deep Learning, created entirely in Python with Numpy as its sole dependency. This library allows you to design and understand the internals of Neural Networks without the need for C/C++ imported code and binaries. Everything is as understandable as Python itself.
-
-**Note**: This library is intended for educational purposes only. It is not recommended for production use, and execution speed is not a primary focus.
\ No newline at end of file
+**Note**: This library is intended for educational purposes only. It is not recommended for production use, and execution speed is not a primary focus.
diff --git a/documentation/mkdocs.yml b/documentation/mkdocs.yml
index 314d061..301d37a 100644
--- a/documentation/mkdocs.yml
+++ b/documentation/mkdocs.yml
@@ -1 +1,7 @@
-site_name: LilTorch
\ No newline at end of file
+site_name: LilTorch
+theme:
+  name: mkdocs
+  color_mode: auto
+  user_color_mode_toggle: true
+  analytics:
+    g-tag: G-2K6L423DZH
diff --git a/liltorch/nn/activation.py b/liltorch/nn/activation.py
index 1e5f186..ff46203 100644
--- a/liltorch/nn/activation.py
+++ b/liltorch/nn/activation.py
@@ -1,15 +1,49 @@
+"""
+This module implements commonly used activation functions for neural networks.
+
+Activation functions introduce non-linearity into the network, allowing it to learn complex patterns
+in the data. This module provides functions for popular activations like ReLU, sigmoid, tanh, etc.
+"""
+
 import numpy as np
 
 from liltorch.nn.layer import Layer
 
 
 class Tanh(Layer):
+    """
+    TanH activation layer for neural networks.
+
+    The Tanh (hyperbolic tangent) activation function introduces non-linearity into the network,
+    allowing it to learn complex patterns. It maps input values between -1 and 1.
+
+    This class implements the Tanh activation function for the forward and backward passes
+    used during neural network training.
+    """
 
-    def forward(self, input_data):
-        '''fordward pass using tanh activation'''
+    def forward(self, input_data: np.ndarray) -> np.ndarray:
+        """
+        Performs the forward pass using the Tanh activation function.
+
+        Args:
+            input_data: A NumPy array representing the input data for this layer.
+
+        Returns:
+            A NumPy array containing the output of the Tanh activation function applied to the input data.
+        """
         self.input = input_data
         return np.tanh(self.input)
 
-    def backward(self, output_error, learning_rate):
-        '''backward pass using derivate of tanh'''
+    def backward(self, output_error: np.ndarray, learning_rate: float) -> np.ndarray:
+        """
+        Calculates the gradients for the backward pass using the derivative of Tanh.
+
+        Args:
+            output_error: The error signal propagated from the subsequent layer during backpropagation.
+                        (A NumPy array)
+        learning_rate: The learning rate used for updating the weights during training. (float)
+
+        Returns:
+            A NumPy array containing the error signal to be propagated back to the previous layer.
+        """
         return (1 - np.tanh(self.input) ** 2) * output_error
diff --git a/liltorch/nn/fully_connected.py b/liltorch/nn/fully_connected.py
index 43d7fd9..11a3948 100644
--- a/liltorch/nn/fully_connected.py
+++ b/liltorch/nn/fully_connected.py
@@ -4,28 +4,74 @@
 
 
 class FullyConnectedLayer(Layer):
+    """
+    Fully-connected layer (dense layer) for neural networks.
+
+    This layer performs a linear transformation on the input data followed by a bias addition.
+    It's a fundamental building block for many neural network architectures.
+
+    During the forward pass, the input data is multiplied by the weight matrix and then added
+    to the bias vector. The resulting output is passed to the next layer in the network.
+
+    During the backward pass, the gradients are calculated for both the weights and biases
+    using backpropagation. These gradients are used to update the weights and biases
+    during training to improve the network's performance.
+    """
 
     def __init__(self, input_size, output_size):
+        """
+        Initializes a fully-connected layer.
+
+        Args:
+          input_size: The number of neurons in the previous layer (the size of the input vector). (int)
+          output_size: The number of neurons in this layer (the size of the output vector). (int)
+        """
         self.weights = np.random.rand(input_size, output_size) - 0.5
         self.bias = np.random.rand(1, output_size) - 0.5
 
     def forward(self, input_data):
-        """ apply input * weigths + bias"""
+        """
+        Performs the forward pass through the layer.
+
+        This method calculates the weighted sum of the input data and the bias vector.
+
+        Args:
+          input_data: The input data for the layer, a NumPy array of shape (batch_size, input_size).
+
+        Returns:
+          The output of the layer after applying the weights and bias, a NumPy array
+          of shape (batch_size, output_size).
+        """
         self.input = input_data
         self.output = np.dot(self.input, self.weights) + self.bias
         return self.output
 
     def backward(self, upstream_gradients, lr):
-        # Calculate gradients to propagate to the previous layer (dL/dz[i]) given 
+        """
+        Performs the backward pass for backpropagation in this layer.
+
+        This method calculates the gradients for the weights, biases, and the error signal
+        to be propagated back to the previous layer.
+
+        Args:
+          upstream_gradients: The gradient signal from the subsequent layer in the network
+                               (a NumPy array of shape (batch_size, output_size)).
+          lr: The learning rate used for updating the weights and biases during training. (float)
+
+        Returns:
+          The gradient signal to be propagated back to the previous layer in the network
+           (a NumPy array of shape (batch_size, input_size)).
+        """
+        # Calculate gradients to propagate to the previous layer (dL/dz[i]) given
         # a previous layer gradient (dL/dz[i+1]) (which in forward pass would be next layer)
         downstream_gradients = np.dot(upstream_gradients, self.weights.T)
-        
+
         # Calculate local gradients for weights and biases (dL/dW and dL/dB )
         local_gradients_w = np.dot(self.input.T, upstream_gradients)
         local_gradients_b = np.sum(upstream_gradients, axis=0, keepdims=True)
-        
+
         # Update weights and biases using the gradients and learning rate
         self.weights -= lr * local_gradients_w
         self.bias -= lr * local_gradients_b
-        
+
         return downstream_gradients
diff --git a/liltorch/nn/layer.py b/liltorch/nn/layer.py
index 6ca276d..70b19be 100644
--- a/liltorch/nn/layer.py
+++ b/liltorch/nn/layer.py
@@ -2,6 +2,7 @@ class Layer:
     """
     This is a abstract class
     """
+
     input = None
     output = None
 
@@ -10,4 +11,3 @@ def foward(self, input):
 
     def backward(self, output_error, learning_rate):
         raise NotImplementedError
-
diff --git a/liltorch/nn/loss.py b/liltorch/nn/loss.py
index 54777f0..6ad2f5a 100644
--- a/liltorch/nn/loss.py
+++ b/liltorch/nn/loss.py
@@ -1,5 +1,14 @@
+"""
+This module implements commonly used loss functions for neural networks.
+
+Loss functions measure the difference between the model's predictions and the ground truth labels.
+Minimizing the loss function during training helps the model learn accurate representations.
+This module provides functions for popular loss functions like mean squared error, cross-entropy, etc.
+"""
+
 import numpy as np
 
+
 class MeanSquaredError:
     """
     Class to compute the Mean Squared Error (MSE) and its gradient.
@@ -34,4 +43,4 @@ def backward(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
         Returns:
         np.ndarray: The gradient of the loss with respect to y_pred.
         """
-        return 2 * (y_pred - y_true) / y_true.size
\ No newline at end of file
+        return 2 * (y_pred - y_true) / y_true.size
diff --git a/liltorch/nn/network.py b/liltorch/nn/network.py
index f66d8b1..52fd54c 100644
--- a/liltorch/nn/network.py
+++ b/liltorch/nn/network.py
@@ -1,19 +1,68 @@
 from liltorch.nn.layer import Layer
 
+
 class Network:
+    """
+    A basic neural network class for building and training multi-layer networks.
+
+    This class provides a framework for creating and using neural networks with customizable layers.
+    It supports adding different layer types (inherited from `liltorch.nn.layer.Layer`), performing
+    forward and backward passes for training, and updating layer weights using the provided learning rate.
+    """
 
     def __init__(self, lr: float) -> None:
+        """
+        Initializes a new neural network.
+
+        Args:
+          lr: The learning rate used for updating the weights of the layers during training. (float)
+        """
         self.layers = []
         self.lr = lr
 
     def add(self, layer: Layer) -> None:
+        """
+        Adds a layer to the neural network.
+
+        This method allows you to build your network by sequentially adding different layer types
+        (e.g., `Tanh`, `Linear`, etc.) inherited from the `Layer` class.
+
+        Args:
+          layer: An instance of a layer class from `liltorch.nn.layer`.
+        """
         self.layers.append(layer)
 
-    def forward(self, x):
+    def forward(self, x: np.ndarray) -> np.ndarray:
+        """
+        Performs the forward pass through the network.
+
+        This method propagates the input data (`x`) through all the layers in the network,
+        applying their respective forward passes sequentially.
+
+        Args:
+          x: The input data for the network, typically a NumPy array.
+
+        Returns:
+          The output of the network after passing through all the layers. (NumPy array)
+        """
         for layer in self.layers:
             x = layer.forward(x)
         return x
 
-    def backward(self, error):
+    def backward(self, error: np.ndarray):
+        """
+        Performs the backward pass for backpropagation.
+
+        This method calculates the gradients for all layers in the network using backpropagation.
+        It iterates through the layers in reverse order, starting from the output layer and
+        propagating the error signal back to the previous layers.
+
+        Args:
+          error: The error signal from the loss function, typically a NumPy array.
+
+        Returns:
+          The updated error signal to be propagated further back in the network during training
+            (usually not used in the final output layer). (NumPy array)
+        """
         for layer in reversed(self.layers):
-                error = layer.backward(error, self.lr)
+            error = layer.backward(error, self.lr)
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 233f889..64aba62 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -1,4 +1,5 @@
 pytest==8.2.2
-Sphinx==7.3.7
-sphinx-autodoc-typehints==2.2.2
-sphinx-markdown-builder==0.6.6
+mkdocs==1.6.0
+pydoc-markdown==4.8.2
+black>=23.1.0
+coverage==7.5.4
diff --git a/setup.py b/setup.py
index 09a1646..6379cde 100644
--- a/setup.py
+++ b/setup.py
@@ -2,10 +2,11 @@
 
 setup(
     name='liltorch',
-    version='0.0.1',
-    description='Small neural network library made only with raw python',
+    version='0.0.2',
+    description='Small neural network library made with numpy and raw python',
     author='Mateus Souza',
     packages=find_packages(),
+    install_requires=['numpy'],
     long_description=open('README.md').read(),
     long_description_content_type='text/markdown',
 )