diff --git a/sw/applications/example_im2col/im2colGolden.c b/sw/applications/example_im2col/im2colGolden.c
new file mode 100644
index 000000000..7ffd56bb9
--- /dev/null
+++ b/sw/applications/example_im2col/im2colGolden.c
@@ -0,0 +1,72 @@
+/*
+   Copyright EPFL contributors.
+  Licensed under the Apache License, Version 2.0, see LICENSE for details.
+  SPDX-License-Identifier: Apache-2.0
+
+  Author: Tommaso Terzano <tommaso.terzano@epfl.ch>
+
+  Info: Contains randomly generated input activations and the golden result of the im2col algorithm.
+*/
+
+#include "im2colGolden.h"
+
+const uint32_t input_image_nchw[48] = {
+    13932, 24003, 46802, 9895,
+    46807, 33972, 44507, 1507,
+    14638, 51479, 39560, 22725,
+    38212, 35631, 40479, 39503,
+    53705, 5796, 58640, 51585,
+    45069, 32035, 41983, 18828,
+    22247, 54792, 20499, 6640,
+    20565, 25501, 4154, 2925,
+    43660, 10618, 52141, 45092,
+    46500, 63085, 57079, 16974,
+    52033, 46977, 35992, 6933,
+    3158, 21127, 28588, 61815
+};
+
+const uint32_t golden_im2col_nchw[108] = {
+    0, 0, 0, 0, 33972, 1507, 0, 35631, 39503,
+    0, 0, 0, 46807, 44507, 0, 38212, 40479, 0,
+    0, 24003, 9895, 0, 51479, 22725, 0, 0, 0,
+    13932, 46802, 0, 14638, 39560, 0, 0, 0, 0,
+    0, 0, 0, 0, 32035, 18828, 0, 25501, 2925,
+    0, 0, 0, 45069, 41983, 0, 20565, 4154, 0,
+    0, 5796, 51585, 0, 54792, 6640, 0, 0, 0,
+    53705, 58640, 0, 22247, 20499, 0, 0, 0, 0,
+    0, 0, 0, 0, 63085, 16974, 0, 21127, 61815,
+    0, 0, 0, 46500, 57079, 0, 3158, 28588, 0,
+    0, 10618, 45092, 0, 46977, 6933, 0, 0, 0,
+    43660, 52141, 0, 52033, 35992, 0, 0, 0, 0
+};
+
+const uint32_t input_image_nhwc[48] = {
+ 4047, 16986, 10416,
+ 22393, 36967, 57252,
+ 30217, 40720, 42651,
+ 3810, 4754, 56157,
+ 44724, 26083, 1010,
+ 44426, 14005, 35222,
+ 47712, 1887, 65,
+ 37412, 50137, 2236,
+ 7582, 53150, 12696,
+ 24415, 40340, 26558,
+ 22643, 14656, 7085,
+ 804, 32415, 17930,
+ 47706, 3314, 2947,
+ 19673, 37744, 24015,
+ 55137, 1975, 54009,
+ 25888, 50886, 35445
+};
+
+const uint32_t golden_im2col_nhwc[108] = {
+ 0, 0, 0, 4047, 0, 0, 0, 16986, 0, 0, 0, 10416,
+ 0, 0, 22393, 30217, 0, 0, 36967, 40720, 0, 0, 57252, 42651,
+ 0, 0, 3810, 0, 0, 0, 4754, 0, 0, 0, 56157, 0,
+ 0, 44724, 0, 7582, 0, 26083, 0, 53150, 0, 1010, 0, 12696,
+ 44426, 47712, 24415, 22643, 14005, 1887, 40340, 14656, 35222, 65, 26558, 7085,
+ 37412, 0, 804, 0, 50137, 0, 32415, 0, 2236, 0, 17930, 0,
+ 0, 47706, 0, 0, 0, 3314, 0, 0, 0, 2947, 0, 0,
+ 19673, 55137, 0, 0, 37744, 1975, 0, 0, 24015, 54009, 0, 0,
+ 25888, 0, 0, 0, 50886, 0, 0, 0, 35445, 0, 0, 0
+};
diff --git a/sw/applications/example_im2col/im2colGolden.h b/sw/applications/example_im2col/im2colGolden.h
new file mode 100644
index 000000000..27083aad1
--- /dev/null
+++ b/sw/applications/example_im2col/im2colGolden.h
@@ -0,0 +1,31 @@
+/*
+   Copyright EPFL contributors.
+  Licensed under the Apache License, Version 2.0, see LICENSE for details.
+  SPDX-License-Identifier: Apache-2.0
+
+  Author: Tommaso Terzano <tommaso.terzano@epfl.ch>
+
+  Info: Header file of im2colGolden, contains activations parameters and the prototypes of both input tensors and golden output.
+*/
+
+#ifndef IMAGE_AND_COL_H
+#define IMAGE_AND_COL_H
+
+#include <stdint.h>
+
+// Parameters
+#define IW 4
+#define IH 4
+#define CH 3
+#define FW 2
+#define FH 2
+#define STRIDES 2
+#define PAD 1
+#define BATCH 1
+
+extern const uint32_t input_image_nchw[48];
+extern const uint32_t golden_im2col_nchw[108];
+extern const uint32_t input_image_nhwc[48];
+extern const uint32_t golden_im2col_nhwc[108];
+
+#endif
diff --git a/sw/applications/example_im2col/im2col_lib.c b/sw/applications/example_im2col/im2col_lib.c
new file mode 100644
index 000000000..ae4d11604
--- /dev/null
+++ b/sw/applications/example_im2col/im2col_lib.c
@@ -0,0 +1,211 @@
+/*
+    Copyright EPFL contributors.
+    Licensed under the Apache License, Version 2.0, see LICENSE for details.
+    SPDX-License-Identifier: Apache-2.0
+
+    Author: Tommaso Terzano <tommaso.terzano@epfl.ch>
+
+    Info: im2col_lib.c describes functions used to calculate im2col and verify it using 
+    the golden result in im2colGolden.c.
+
+    Notes: im2col_nchw_int32() and im2col_nhwc_int32() algorithms are inspired from the library SHL, developed by T-HEAD Semi.
+    For reference, check out the following link:
+    https://github.com/T-head-Semi/csi-nn2/blob/main/source/reference/im2col.c
+*/
+
+#include "im2col_lib.h"
+
+int output_data[OH_NCHW*OW_NCHW];
+
+int im2col_nchw_int32()
+{
+    PRINTF("OH: %d, OW: %d\n", OH_NCHW, OW_NCHW);
+
+    int size_transfer = 0;
+    int im_row = 0;
+    int im_col = 0;
+    int w_offset = 0;  // the offset ALONG the IW
+    int h_offset = 0; // the offset ALONG the IH
+    int im_c = 0; // Gets the CH on which the im2col is being performed depending on the row of the output image (c)
+    int col_index = 0;
+    
+    // Iterate over each row of the output matrix.
+    for (int c = 0; c < CH_COL; ++c) {
+        // Calculate offsets within the kernel window.
+        // These are used to move the filter around the input image
+
+        w_offset = c % FW;  
+        h_offset = (c / FW) % FH;
+        im_c = c / (FH * FW); // Gets the CH on which the im2col is being performed depending on the row of the output image (c)
+
+        // Iterate over each BATCH.
+        for (int b = 0; b < BATCH; ++b) {
+            // Iterate over each patch on the IW of the input matrix.
+            for (int h = 0; h < N_PATCHES_H; ++h) {
+                // Iterate over each patch on the heigth in the output matrix.
+                for (int w = 0; w < N_PATCHES_W; ++w) {
+                    // Calculate the row and column indices in the original input image, applying the stride and offset.
+                    im_row = h_offset + h * STRIDES - PAD;
+                    im_col = w_offset + w * STRIDES - PAD;
+
+                    // Calculate the index in the flattened output array where this value should be stored.
+                    col_index = ((c * BATCH + b) * N_PATCHES_H + h) * N_PATCHES_W + w;
+                    
+                    // If the calculated indices are outside the bounds of the input image, set the output to 0 (padding effect).
+                    // Otherwise, fetch the value from the input image and store it in the output array.
+                    if (im_row < 0 || im_col < 0 || im_row >= IH || im_col >= IW) {
+                        output_data[col_index] = 0;
+                    } else {
+                        output_data[col_index] = input_image_nchw[get_index(CH, IH, IW, b, im_c, im_row, im_col)];                        
+                    }
+                }
+            }
+        }
+    }
+
+    // Finished!
+
+    PRINTF("Final output matrix:\n\n");
+
+    #if DEBUG
+    for (int i=0; i<OH; i++)
+    {
+        for (int j=0; j<OW; j++)
+        {
+            PRINTF("%d ", output_data[i*OW + j]);
+        }
+        PRINTF("\n");
+    }
+    #endif
+
+    // Return a 0 to indicate a success
+    return 0;
+}
+
+int im2col_nhwc_int32()
+{
+    PRINTF("OH: %d, OW: %d\n", OH_NHWC, OW_NHWC);
+
+    int size_transfer = 0;
+    int im_row = 0;
+    int im_col = 0;
+    int w_offset = 0;  // the offset ALONG the IW
+    int h_offset = 0; // the offset ALONG the IH
+    int im_c = 0; // Gets the CH on which the im2col is being performed depending on the row of the output image (c)
+    int col_index = 0;
+
+    // Calculate the heigth of the output matrix
+
+    // Iterate over each row of the output matrix.
+    for (int b = 0; b < BATCH; ++b) {
+        // Iterate over each BATCH.
+        for (int h = 0; h < N_PATCHES_H; ++h) {
+            // Iterate over each patch on the IW of the input matrix.
+            for (int w = 0; w < N_PATCHES_W; ++w) {
+                // Iterate over each patch on the heigth in the output matrix.
+                for (int c = 0; c < CH_COL; ++c) {
+                    // Calculate offsets within the kernel window.
+                    // These are used to move the filter around the input image
+
+                    w_offset = c % FW;  
+                    h_offset = (c / FW) % FH;
+                    im_c = c / (FH * FW); // Gets the CH on which the im2col is being performed depending on the row of the output image (c)
+                    
+                    // Calculate the row and column indices in the original input image, applying the stride and offset.
+                    im_row = h_offset + h * STRIDES - PAD;
+                    im_col = w_offset + w * STRIDES - PAD;
+
+                    // Calculate the index in the flattened output array where this value should be stored.
+                    col_index = ((b * N_PATCHES_H + h) * N_PATCHES_W + w) * CH_COL + c; //  ((c * BATCH + b) * N_PATCHES_H + h) * N_PATCHES_W + w;
+                    
+                    // If the calculated indices are outside the bounds of the input image, set the output to 0 (padding effect).
+                    // Otherwise, fetch the value from the input image and store it in the output array.
+                    if (im_row < 0 || im_col < 0 || im_row >= IH || im_col >= IW) {
+                        output_data[col_index] = 0;
+                    } else {
+                        output_data[col_index] = input_image_nhwc[get_index(IH, IW, CH, b, im_row, im_col, im_c)];
+                    }                    
+                }
+            }
+        }
+    }
+
+    PRINTF("Final output matrix:\n\n");
+
+    #if DEBUG
+    for (int i=0; i<OH; i++)
+    {
+        for (int j=0; j<OW; j++)
+        {
+            PRINTF("%d ", output_data[i*OW + j]);
+        }
+        PRINTF("\n");
+    }
+    #endif
+
+    // Return a 0 to indicate a success
+    return 0;
+}
+
+
+int get_index(int dim1, int dim2, int dim3, int index0, int index1, int index2,
+                          int index3)
+{
+    return ((index0 * dim1 + index1) * dim2 + index2) * dim3 + index3;
+}
+
+// Verifies the im2col using golden values generated by "verification_script.py"
+int verify(int format)
+{
+    int errors = 0;
+
+    if (format == 0)
+    {
+        for (int i=0; i<OH_NCHW; i++)
+        {
+            for (int j=0; j<OW_NCHW; j++)
+            {    
+                if (golden_im2col_nchw[i*OW_NCHW + j] != output_data[i*OW_NCHW + j])
+                {
+                    PRINTF("ERROR: Golden: %d, Output: %d, at %d %d\n", golden_im2col_nchw[i*OW_NCHW + j], output_data[i*OW_NCHW + j], i, j);
+                    errors ++;
+                }
+            }
+        }
+    }
+    else
+    {
+        for (int i=0; i<OH_NHWC; i++)
+        {
+            for (int j=0; j<OW_NHWC; j++)
+            {    
+                if (golden_im2col_nhwc[i*OW_NHWC + j] != output_data[i*OW_NHWC + j])
+                {
+                    PRINTF("ERROR: Golden: %d, Output: %d, at %d %d\n", golden_im2col_nhwc[i*OW_NHWC + j], output_data[i*OW_NHWC + j], i, j);
+                    errors ++;
+                }
+            }
+        }
+    }
+    return errors;
+}
+
+void dma_run(dma_trans_t * trans)
+{
+    int res = dma_validate_transaction(trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY );
+    res = dma_load_transaction(trans);
+    res = dma_launch(trans);
+
+    while( ! dma_is_ready()) {
+        // disable_interrupts
+        // this does not prevent waking up the core as this is controlled by the MIP register
+        CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8);
+        if ( dma_is_ready() == 0 ) {
+            wait_for_interrupt();
+            //from here we wake up even if we did not jump to the ISR
+        }
+        CSR_SET_BITS(CSR_REG_MSTATUS, 0x8);
+    }
+
+    return;
+}
diff --git a/sw/applications/example_im2col/im2col_lib.h b/sw/applications/example_im2col/im2col_lib.h
new file mode 100644
index 000000000..2f5719494
--- /dev/null
+++ b/sw/applications/example_im2col/im2col_lib.h
@@ -0,0 +1,74 @@
+/*
+    Copyright EPFL contributors.
+    Licensed under the Apache License, Version 2.0, see LICENSE for details.
+    SPDX-License-Identifier: Apache-2.0
+
+    Author: Tommaso Terzano <tommaso.terzano@epfl.ch>
+    
+    Info: Header file of im2col_lib.c, containing the function prototypes, parameters macros and the configuration of prints and performance analysis.
+*/
+
+#ifndef _IM2COL_
+#define _IM2COL_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "im2colGolden.h"
+#include "dma.h"
+#include "core_v_mini_mcu.h"
+#include "x-heep.h"
+#include "rv_plic.h"
+#include "csr.h"
+
+// By default, printfs are activated for FPGA and for simulation.
+#define PRINTF_IN_FPGA  1
+#define PRINTF_IN_SIM   1
+#define DEBUG 0 // Set to 1 to enable debug prints
+#define TIMING 0 // Set to 1 to enable timing measurements
+
+// Format is defined in im2colGolden.h
+
+#if TARGET_SIM && PRINTF_IN_SIM
+    #define PRINTF(fmt, ...)    printf(fmt, ## __VA_ARGS__)
+    #define PRINTF_DEB(...) 
+    #define PRINTF_TIM(...)   
+#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA
+    #define PRINTF(fmt, ...)    printf(fmt, ## __VA_ARGS__)
+    #if DEBUG
+        #define PRINTF_DEB(fmt, ...)    printf(fmt, ## __VA_ARGS__)
+    #else
+        #define PRINTF_DEB(...)
+    #endif
+    #if TIMING
+        #define PRINTF_TIM(fmt, ...)    printf(fmt, ## __VA_ARGS__)
+    #else
+        #define PRINTF_TIM(...)
+    #endif
+#else
+    #define PRINTF(...)
+    #define PRINTF_DEB(...)
+    #define PRINTF_TIM(...)
+#endif
+
+// Define the dimensions of the input tensor and the kernel
+
+#define N_PATCHES_H ((IH + (PAD + PAD) - FH)/ STRIDES + 1)
+#define N_PATCHES_W ((IW + (PAD + PAD) - FW)/ STRIDES + 1)
+
+#define CH_COL (CH * FH * FW)
+
+#define OH_NCHW (CH * FH * FW * BATCH)
+#define OW_NCHW (N_PATCHES_H) * (N_PATCHES_W)
+
+#define OW_NHWC (FW * FH * CH * BATCH)
+#define OH_NHWC (N_PATCHES_W) * (N_PATCHES_H)
+
+int im2col_nchw_int32();
+int im2col_nhwc_int32();
+
+int get_index(int dim1, int dim2, int dim3, int index0, int index1, int index2, int index3);
+                
+int verify(int format);
+
+#endif
\ No newline at end of file
diff --git a/sw/applications/example_im2col/main.c b/sw/applications/example_im2col/main.c
new file mode 100644
index 000000000..a22c8bf8d
--- /dev/null
+++ b/sw/applications/example_im2col/main.c
@@ -0,0 +1,81 @@
+/*
+    Copyright EPFL contributors.
+    Licensed under the Apache License, Version 2.0, see LICENSE for details.
+    SPDX-License-Identifier: Apache-2.0
+
+    Author: Tommaso Terzano <tommaso.terzano@epfl.ch>
+    
+    Info: Example application of im2col algorithm with configurable format, verification and performance analysis.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "x-heep.h"
+#include "im2col_lib.h"
+
+#define NCHW_FORMAT 0
+#define NHWC_FORMAT 1
+
+int main()
+{
+    PRINTF("\nStarting test...\n\n");
+    
+    int errors;
+    unsigned int cycles;
+    
+    #if TIMING
+        CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1);
+        CSR_WRITE(CSR_REG_MCYCLE, 0);
+    #endif 
+    
+    im2col_nchw_int32(); // Execute the im2col algorithm with NCHW format
+
+    #if TIMING
+        CSR_READ(CSR_REG_MCYCLE, &cycles);
+    #endif
+    
+    errors = verify(NCHW_FORMAT);
+
+    PRINTF("im2col NCHW test executed\n");
+    
+    PRINTF_TIM("Total number of cycles: [%d]\n\n", cycles);
+
+    if (errors != 0)
+    {
+        PRINTF("TEST FAILED: %d errors\n", errors);
+        return 1;
+    } 
+    else
+    {
+        PRINTF("TEST PASSED!\n");
+    }
+
+    #if TIMING
+        CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1);
+        CSR_WRITE(CSR_REG_MCYCLE, 0);
+    #endif
+
+    im2col_nhwc_int32(); // Execute the im2col algorithm with NHWC format
+
+    #if TIMING
+        CSR_READ(CSR_REG_MCYCLE, &cycles);
+    #endif
+
+    errors = verify(NHWC_FORMAT);
+
+    PRINTF("im2col NHWC test executed\n");
+    PRINTF_TIM("Total number of cycles: [%d]\n\n", cycles);
+
+    if (errors != 0)
+    {
+        PRINTF("TEST FAILED: %d errors\n", errors);
+        return 1;
+    } 
+    else
+    {
+        PRINTF("TEST PASSED!\n");
+    }
+
+    return 0;
+}