diff --git a/.gitignore b/.gitignore
index 61ea876..091ffe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,8 @@
 .DS_Store
 *.vscode
 venv/
-gen/
\ No newline at end of file
+gen/
+bd*/
+logs
+**/__pycache__/
+openocd.cfg
\ No newline at end of file
diff --git a/Makefile b/Makefile
deleted file mode 100644
index f2d526d..0000000
--- a/Makefile
+++ /dev/null
@@ -1,68 +0,0 @@
-TEST       ?=hello
-TARGET     ?=native
-TOOL       ?=gcc
-OPT        ?=speed
-RUNDIR     ?=/Users/$(USER)/STM32CubeIDE/workspace/embench_dsp/Debug/.
-
-ROOT       :=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
-
-TST_DIR     =$(ROOT)/tests/$(TEST)
-TGT_DIR     =$(ROOT)/targets/$(TARGET)
-CMN_DIR     =$(ROOT)/common
-
-include $(TST_DIR)/test.mk
-include $(TGT_DIR)/target.mk
-
-SRC         =$(TGT_SRC) $(TST_SRC) $(ROOT)/main.c
-OBJ         =$(TGT_OBJ) $(TST_OBJ) $(patsubst %.c,%.o, $(notdir $(ROOT)/main.c))
-INC         =$(TGT_INC) $(TST_INC)
-
-ifeq ($(TOOL),gcc)
-  CC        =gcc
-  OBJDUMP   =objdump
-  EXE       =embench_dsp
-  SZ        =size
-  CC_FLG    =$(TGT_FLG) -g3 -ffunction-sections -fdata-sections -Wdouble-promotion $(TST_FLG)
-  SZ_FLG    =
-else ifeq ($(TOOL),armgcc)
-  CC        =arm-none-eabi-gcc
-  OBJDUMP   =arm-none-eabi-objdump
-  SZ        =arm-none-eabi-size
-  EXE       =embench_dsp.elf
-  CC_FLG    =$(TGT_FLG) -g3 -ffunction-sections -fdata-sections -fsingle-precision-constant -Wdouble-promotion $(TST_FLG)
-  LD_FLG    =-Wl,--gc-sections -static -Wl,--start-group -lc -lm -Wl,--end-group --specs=nano.specs $(TGT_LD)
-  SZ_FLG    =--format=GNU
-endif
-
-ifeq ($(OPT),speed)
-  CC_FLG   +=-O2
-else ifeq ($(OPT),size)
-  CC_FLG   +=-Os
-endif
-
-rebuild: clean build
-
-clean:
-	rm -rf gen
-
-setup:
-	mkdir -p gen
-
-compile: setup
-	cd gen && $(CC) $(CC_FLG) $(INC) -c $(SRC)
-
-link:
-	cd gen && $(CC) $(CC_FLG) $(LD_FLG) $(OBJ) -o $(EXE)
-
-disassemble:
-	cd gen && $(OBJDUMP) -h -S $(EXE) > embench_dsp.lst
-
-size:
-	cd gen && $(SZ) $(SZ_FLG) $(EXE) > embench_dsp.size
-
-build: compile link disassemble size
-	cd gen && cp embench_dsp* $(RUNDIR)
-
-# native build/run only
-run: clean compile link disassemble size
-	./gen/embench_dsp
\ No newline at end of file
diff --git a/baseline-data/size.json b/baseline-data/size.json
new file mode 100644
index 0000000..64e9b1b
--- /dev/null
+++ b/baseline-data/size.json
@@ -0,0 +1,50 @@
+{
+  "biquad_cascade_df2T_f32_sos3_n1" : {
+    "text" : 30228,
+    "rodata" : 1056,
+    "data" : 2212,
+    "bss" : 412
+  },
+  "biquad_cascade_df2T_f32_sos3_n128" : {
+    "text" : 30228,
+    "rodata" : 1056,
+    "data" : 3228,
+    "bss" : 924
+  },
+  "dct4_2048_f32" : {
+    "text" : 34012,
+    "rodata" : 45648,
+    "data" : 18100,
+    "bss" : 16772
+  },
+  "dct4_512_f32" : {
+    "text" : 34012,
+    "rodata" : 12208,
+    "data" : 5812,
+    "bss" : 4484
+  },
+  "fir_f32_taps256_n1" : {
+    "text" : 30380,
+    "rodata" : 1048,
+    "data" : 3772,
+    "bss" : 1412
+  },
+  "fir_f32_taps256_n128" : {
+    "text" : 30380,
+    "rodata" : 1048,
+    "data" : 4788,
+    "bss" : 2428
+  },
+  "rfft2048_f32" : {
+    "text" : 33620,
+    "rodata" : 21048,
+    "data" : 18100,
+    "bss" : 8580
+  },
+  "rfft512_f32" : {
+    "text" : 33620,
+    "rodata" : 6040,
+    "data" : 5812,
+    "bss" : 2436
+  }
+}
diff --git a/baseline.csv b/baseline.csv
deleted file mode 100644
index addfb85..0000000
--- a/baseline.csv
+++ /dev/null
@@ -1,17 +0,0 @@
-opt,test,config,cycles,size
-speed,biquad_cascade_df2T_f32,sos3_n128,8163,5692
-speed,biquad_cascade_df2T_f32,sos3_n1,162,5692
-speed,dct4_512_f32,default,72584,9780
-speed,dct4_2048_f32,default,308902,9780
-speed,fir_f32,taps256_n128,298420,5916
-speed,fir_f32,taps256_n1,3908,5916
-speed,rfft512_f32,default,34156,9356
-speed,rfft2048_f32,default,155556,9356
-size,biquad_cascade_df2T_f32,sos3_n128,8944,5608
-size,biquad_cascade_df2T_f32,sos3_n1,181,5608
-size,dct4_512_f32,default,79958,9440
-size,dct4_2048_f32,default,334310,9440
-size,fir_f32,taps256_n128,430262,5820
-size,fir_f32,taps256_n1,5194,5816
-size,rfft512_f32,default,36353,9048
-size,rfft2048_f32,default,160202,9048
\ No newline at end of file
diff --git a/baseline.txt b/baseline.txt
deleted file mode 100644
index e8524b6..0000000
--- a/baseline.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-|-------|-------------------------|--------------|--------|------|
-| opt   | test                    | config       | cycles | size |
-|-------|-------------------------|--------------|--------|------|
-| speed | biquad_cascade_df2T_f32 | sos3_n128    |   8163 | 5692 |
-|-------|-------------------------|--------------|--------|------|
-| speed | biquad_cascade_df2T_f32 | sos3_n1      |    162 | 5692 |
-|-------|-------------------------|--------------|--------|------|
-| speed | dct4_512_f32            | default      |  72584 | 9780 |
-|-------|-------------------------|--------------|--------|------|
-| speed | dct4_2048_f32           | default      | 308902 | 9780 |
-|-------|-------------------------|--------------|--------|------|
-| speed | fir_f32                 | taps256_n128 | 298420 | 5916 |
-|-------|-------------------------|--------------|--------|------|
-| speed | fir_f32                 | taps256_n1   |   3908 | 5916 |
-|-------|-------------------------|--------------|--------|------|
-| speed | rfft512_f32             | default      |  34156 | 9356 |
-|-------|-------------------------|--------------|--------|------|
-| speed | rfft2048_f32            | default      | 155556 | 9356 |
-|-------|-------------------------|--------------|--------|------|
-| size  | biquad_cascade_df2T_f32 | sos3_n128    |   8944 | 5608 |
-|-------|-------------------------|--------------|--------|------|
-| size  | biquad_cascade_df2T_f32 | sos3_n1      |    181 | 5608 |
-|-------|-------------------------|--------------|--------|------|
-| size  | dct4_512_f32            | default      |  79958 | 9440 |
-|-------|-------------------------|--------------|--------|------|
-| size  | dct4_2048_f32           | default      | 334310 | 9440 |
-|-------|-------------------------|--------------|--------|------|
-| size  | fir_f32                 | taps256_n128 | 430262 | 5820 |
-|-------|-------------------------|--------------|--------|------|
-| size  | fir_f32                 | taps256_n1   |   5194 | 5816 |
-|-------|-------------------------|--------------|--------|------|
-| size  | rfft512_f32             | default      |  36353 | 9048 |
-|-------|-------------------------|--------------|--------|------|
-| size  | rfft2048_f32            | default      | 160202 | 9048 |
-|-------|-------------------------|--------------|--------|------|
diff --git a/benchmark_size.py b/benchmark_size.py
new file mode 100755
index 0000000..49f65e2
--- /dev/null
+++ b/benchmark_size.py
@@ -0,0 +1,591 @@
+#!/usr/bin/env python3
+
+# Script to benchmark size
+
+# Copyright (C) 2017, 2019 Embecosm Limited
+# Copyright (C) 2021 Roger Shepherd
+#
+# Contributor: Graham Markall <graham.markall@embecosm.com>
+# Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+# Contributor: Roger Shepherd <roger.shepherd@rcjd.net>
+# Contributor: Konrad Moron <konrad.moron@tum.de>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+"""Compute the size benchmark for a set of compiled Embench programs.
+
+This script only handles elf format files as they are the standard executable
+format for microprocessors.
+
+Categories of sections
+
+This script is concerned with 4 categories of section. These sections are
+associated by their ELF flags and ELF type. The default associations are:
+
+    category of section                   Flags      Type
+    -------------------                   -----      ----
+    executable code                       AX         PROGBITS
+    non-zero initialized writable data    AW or AWX  PROGBITS
+    read only data                        A          PROGBITS
+    zero initialized writable data (BSS)  AW or AWX  NOBITS
+
+"""
+
+import argparse
+import os
+import sys
+import platform
+
+from json import loads
+from elftools.elf import elffile as elf
+from elftools.elf.constants import SH_FLAGS as FLAGS
+
+sys.path.append(
+    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'pylib'))
+
+from embench_core import check_python_version
+from embench_core import log
+from embench_core import gp
+from embench_core import setup_logging
+from embench_core import log_args
+from embench_core import find_benchmarks
+from embench_core import log_benchmarks
+from embench_core import embench_stats
+from embench_core import output_format
+
+# the default section flags and types are used both in validate_args and in
+# collect_data.
+DEFAULT_FLAGS_ELF = {
+    'text': ({int(FLAGS.SHF_ALLOC | FLAGS.SHF_EXECINSTR)}, 'SHT_PROGBITS'),
+    'rodata': ({int(FLAGS.SHF_ALLOC)}, 'SHT_PROGBITS'),
+    'data': ({
+        int(FLAGS.SHF_ALLOC | FLAGS.SHF_WRITE),
+        int(FLAGS.SHF_ALLOC | FLAGS.SHF_WRITE | FLAGS.SHF_EXECINSTR)
+    }, 'SHT_PROGBITS'),
+    'bss': ({
+        int(FLAGS.SHF_ALLOC | FLAGS.SHF_WRITE),
+        int(FLAGS.SHF_ALLOC | FLAGS.SHF_WRITE | FLAGS.SHF_EXECINSTR)
+    }, 'SHT_NOBITS'),
+}
+
+DEFAULT_SECNAMELIST_DICT = {
+    'elf': DEFAULT_FLAGS_ELF,
+}
+"""
+Metrics
+
+The script reports a metric which is the sum of the sizes of a number of the
+categories of sections. By default the metric reported is executable code (text)
+category. This can be overridden using the `—metric` parameter which takes the space
+separated list of categories to be included in the metric.
+"""
+# the categories and the metrics happen to be the same; they could be different
+ALL_CATEGORIES = ['text', 'rodata', 'data', 'bss']
+ALL_METRICS = ['text', 'rodata', 'data', 'bss']
+
+
+def build_parser():
+    """Build a parser for all the arguments"""
+    parser = argparse.ArgumentParser(description='Compute the size benchmark')
+
+    parser.add_argument(
+        '--builddir',
+        type=str,
+        default='bd',
+        help='Directory holding all the binaries',
+    )
+    parser.add_argument(
+        '--logdir',
+        type=str,
+        default='logs',
+        help='Directory in which to store logs',
+    )
+    parser.add_argument(
+        '--baselinedir',
+        type=str,
+        default='baseline-data',
+        help='Directory which contains baseline data',
+    )
+    parser.add_argument(
+        '--absolute',
+        action='store_true',
+        help='Specify to show absolute results',
+    )
+    parser.add_argument(
+        '--relative',
+        dest='absolute',
+        action='store_false',
+        help='Specify to show relative results (the default)',
+    )
+    parser.add_argument(
+        '--json-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.JSON,
+        help='Specify to output in JSON format',
+    )
+    parser.add_argument(
+        '--text-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.TEXT,
+        help='Specify to output as plain text (the default)',
+    )
+    parser.add_argument(
+        '--md-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.MD,
+        help='Specify to output as MarkDown',
+    )
+    parser.add_argument(
+        '--csv-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.CSV,
+        help='Specify to output as CSV',
+    )
+    parser.add_argument(
+        '--baseline-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.BASELINE,
+        help='Specify to output in a format suitable for use as a baseline')
+    parser.add_argument(
+        '--json-comma',
+        action='store_true',
+        help='Specify to append a comma to the JSON output',
+    )
+    parser.add_argument(
+        '--no-json-comma',
+        dest='json_comma',
+        action='store_false',
+        help='Specify to not append a comma to the JSON output',
+    )
+    parser.add_argument(
+        '--dummy-benchmark',
+        help='Dummy benchmark to measure library size overhead',
+        default={})
+    # List arguments are empty by default, a user specified value then takes
+    # precedence. If the list is empty after parsing, then we can install a
+    # default value.
+    parser.add_argument(
+        '--metric',
+        type=str,
+        default=[],
+        nargs='+',
+        choices=ALL_METRICS,
+        action='extend',
+        help=
+        'Section categories to include in metric: one or more of "text", "rodata", '
+        + 'or "data". Default "text"',
+    )
+    parser.add_argument(
+        '--file-extension',
+        type=str,
+        default=None,
+        help=
+        'Optional file extension to append to bench mark names when searching for binaries.'
+    )
+
+    return parser
+
+
+def validate_build_dir(args):
+    """Check that we have a valid build directory and update the gp dictionary
+    accordingly."""
+    if os.path.isabs(args.builddir):
+        gp['bd'] = args.builddir
+    else:
+        gp['bd'] = os.path.join(gp['rootdir'], args.builddir)
+
+    if not os.path.isdir(gp['bd']):
+        log.error(f'ERROR: build directory {gp["bd"]} not found: exiting')
+        sys.exit(1)
+
+    if not os.access(gp['bd'], os.R_OK):
+        log.error(f'ERROR: Unable to read build directory {gp["bd"]}: exiting')
+        sys.exit(1)
+
+
+def validate_baseline_dir(args):
+    """Set up the appropriate baseline directory."""
+    if os.path.isabs(args.baselinedir):
+        gp['baseline_dir'] = args.baselinedir
+    else:
+        gp['baseline_dir'] = os.path.join(gp['rootdir'], args.baselinedir)
+
+
+def validate_output_format(args):
+    """Set up the output format."""
+    if args.output_format:
+        gp['output_format'] = args.output_format
+    else:
+        gp['output_format'] = output_format.TEXT
+
+
+def validate_metric(args):
+    """Set up the metric(s) to be used.  If no categories are specified, we
+    just use text."""
+    if args.metric:
+        gp['metric'] = args.metric
+    else:
+        gp['metric'] = ['text']
+
+
+def validate_dummy_bm(args):
+    """Set up the dummy benchmark to use, with a default if none specified."""
+    if args.dummy_benchmark:
+        gp['dummy_benchmark'] = args.dummy_benchmark
+    else:
+        gp['dummy_benchmark'] = "dummy-benchmark"
+
+
+def validate_file_ext(args):
+    """Set up the extension to be used with executables, using a default if
+    none is specified."""
+    if args.file_extension is None:
+        if platform.system() == 'Windows':
+            gp['file_extension'] = '.exe'
+        else:
+            gp['file_extension'] = ''
+    else:
+        gp['file_extension'] = args.file_extension
+
+
+def validate_args(args):
+    """Check that supplied args are all valid. By definition logging is
+       working when we get here.
+
+       Update the gp dictionary with all the useful info"""
+    gp['format'] = 'elf'
+    validate_build_dir(args)
+    gp['bd_supportdir'] = os.path.join(gp['bd'], 'support')
+    validate_baseline_dir(args)
+    gp['absolute'] = args.absolute
+    validate_output_format(args)
+    validate_metric(args)
+    validate_dummy_bm(args)
+    validate_file_ext(args)
+    if gp['output_format'] == output_format.BASELINE:
+        gp['absolute'] = True
+
+
+def check_for_elf(appexe):
+    """Checked we have an ELF executable."""
+    with open(appexe, 'rb') as fileh:
+        magic = fileh.read(4)
+        fileh.close()
+    if magic != b'\x7fELF':
+        log.info(
+            f'ERROR: Only ELF is supported, {appexe} does not contain magic identifier'
+        )
+        sys.exit(1)
+
+
+def benchmark_size(bench, bd_path, metrics, dummy_sec_sizes):
+    """Compute the total size of the desired sections in a benchmark.  Returns
+       the size in bytes, which may be zero if the section wasn't found."""
+    appexe = os.path.join(bd_path, bench, f"{bench}{gp['file_extension']}")
+    sec_sizes = {}
+
+    # If the benchmark failed to build, then return a 0 size instead of
+    # crashing when failing to open the file.
+    if not os.path.exists(appexe):
+        return {}
+
+    # read format from file and check it is as expected
+    check_for_elf(appexe)
+
+    # TODO: We should insert the lief based anaysis here for use on Apple kit.
+    #binary = lief.parse(appexe)
+
+    with open(appexe, 'rb') as fileh:
+        binary = elf.ELFFile(fileh)
+        for metric in metrics:
+            sec_sizes[metric] = 0
+            sections = binary.iter_sections()
+            for section in sections:
+                metric_sh_flags_list = DEFAULT_FLAGS_ELF[metric][0]
+                metric_sh_type = DEFAULT_FLAGS_ELF[metric][1]
+                for metric_sh_flags in metric_sh_flags_list:
+                    if ((section['sh_flags'] == metric_sh_flags)
+                            and (section['sh_type'] == metric_sh_type)):
+                        sec_sizes[metric] += section['sh_size']
+        for metric, size in dummy_sec_sizes.items():
+            if metric in metrics:
+                sec_sizes[metric] -= size
+
+    # Return the section (group) size
+    return sec_sizes
+
+
+def get_dummy_data():
+    """Get the ELF section size data for the dummy benchmark and return it."""
+    if isinstance(gp['dummy_benchmark'], str):
+        dummy_section_data = benchmark_size(gp['dummy_benchmark'],
+                                            gp['bd_supportdir'], ALL_METRICS,
+                                            {})
+    else:
+        dummy_section_data = {}
+    if not dummy_section_data:
+        dummy_benchmark_abs_path = os.path.join(gp['bd_supportdir'],
+                                                gp['dummy_benchmark'])
+        log.error(
+            f'ERROR: could not find dummy benchmark at {dummy_benchmark_abs_path}'
+        )
+        sys.exit(1)
+    return dummy_section_data
+
+
+def output_json(benchmarks, raw_totals, rel_data):
+    """Output the results in JSON format."""
+    log.info('{  "size results" :')
+    log.info('  { "detailed size results" :')
+
+    for bench in benchmarks:
+        res_output = ''
+        if gp['absolute']:
+            res_output = f'{raw_totals[bench]}'
+        else:
+            res_output = f'{rel_data[bench]:.2f}'
+
+        if bench == benchmarks[0]:
+            log.info('    { ' + f'"{bench}" : {res_output},')
+        elif bench == benchmarks[-1]:
+            log.info(f'      "{bench}" : {res_output}')
+        else:
+            log.info(f'      "{bench}" : {res_output},')
+
+    if gp['absolute']:
+        log.info('    }')
+        log.info('  }')
+        log.info('}')
+    else:
+        log.info('    }')
+        log.info('  },')
+
+
+def output_text(benchmarks, raw_totals, rel_data):
+    """Output the results in plain text format."""
+    log.info('Benchmark            size')
+    log.info('---------            ----')
+
+    for bench in benchmarks:
+        res_output = ''
+        if gp['absolute']:
+            res_output = f' {raw_totals[bench]:8,}'
+        else:
+            res_output = f'   {rel_data[bench]:6.2f}'
+        log.info(f'{bench:15} {res_output:8}')
+
+
+def output_md(benchmarks, raw_totals, rel_data):
+    """Output the results in MarkDown format."""
+    log.info('| Benchmark         |     Size |')
+    log.info('| :---------------- | -------: |')
+
+    for bench in benchmarks:
+        res_output = ''
+        md_bench = '`' + bench + '`'
+        if gp['absolute']:
+            res_output = f'{raw_totals[bench]:8}'
+        else:
+            res_output = f'{rel_data[bench]:8.2f}'
+        log.info(f'| {md_bench:17} | {res_output:8} |')
+
+
+def output_csv(benchmarks, raw_totals, rel_data):
+    """Output the results in CSV format."""
+    log.info('"Benchmark","Size"')
+
+    for bench in benchmarks:
+        res_output = ''
+        if gp['absolute']:
+            res_output = f'{raw_totals[bench]:0}'
+        else:
+            res_output = f'{rel_data[bench]:.2f}'
+        log.info(f'"{bench}","{res_output}"')
+
+
+def output_baseline(benchmarks, raw_section_data):
+    """Output the results in suitable as baseline data."""
+    log.info('{')
+
+    for bench in benchmarks:
+        res_output = ''
+        for metric in ALL_METRICS:
+            # newline before the first metric
+            if metric != ALL_METRICS[0]:
+                res_output += ',\n'
+            value = raw_section_data[bench][metric]
+            res_output += f'    "{metric}" : {value}'
+
+        # comma after all but last benchmark in the log
+        if bench == benchmarks[-1]:
+            log.info(f'  "{bench}" : {{\n{res_output}\n  }}')
+        else:
+            log.info(f'  "{bench}" : {{\n{res_output}\n  }},')
+
+    log.info('}')
+
+
+def collect_data(benchmarks):
+    """Collect and log all the raw and optionally relative data associated with
+       the list of benchmarks supplied in the "benchmarks" argument. Return
+       the raw data and relative data as a list.  The raw data may be empty if
+       there is a failure. The relative data will be empty if only absolute
+       results have been requested.
+
+       Note that we manually generate the JSON output, rather than using the
+       dumps method, because the result will be manually edited, and we want
+       to guarantee the layout."""
+
+    # Baseline data is held external to the script. Import it here.
+    size_baseline = os.path.join(gp['baseline_dir'], 'size.json')
+    with open(size_baseline, "rb") as fileh:
+        baseline_all = loads(fileh.read())
+
+    # Compute the baseline data we need
+    baseline = {}
+
+    for bench, data in baseline_all.items():
+        baseline[bench] = 0
+        for sec in gp['metric']:
+            baseline[bench] += data[sec]
+
+    successful = True
+    raw_section_data = {}
+    raw_totals = {}
+    rel_data = {}
+
+    # Collect dummy section sizes
+    dummy_section_data = get_dummy_data()
+
+    # Measure each benchmark, subtracting the dummy section sizes
+    for bench in benchmarks:
+        if gp['output_format'] == output_format.BASELINE:
+            raw_section_data[bench] = benchmark_size(bench, gp['bd_benchdir'],
+                                                     ALL_METRICS,
+                                                     dummy_section_data)
+        else:
+            raw_section_data[bench] = benchmark_size(bench, gp['bd_benchdir'],
+                                                     gp['metric'],
+                                                     dummy_section_data)
+        raw_totals[bench] = sum(raw_section_data[bench].values())
+
+        # Calculate data relative to the baseline if needed
+        if gp['absolute'] or gp['output_format'] == output_format.BASELINE:
+            rel_data[bench] = {}
+        else:
+            # Want relative results (the default). If baseline is zero, just
+            # use 0.0 as the value.  Note this is inverted compared to the
+            # speed benchmark, so SMALL is good.
+            if baseline[bench] > 0:
+                rel_data[bench] = raw_totals[bench] / baseline[bench]
+            else:
+                rel_data[bench] = 0.0
+
+    # Output it
+    if gp['output_format'] == output_format.JSON:
+        output_json(benchmarks, raw_totals, rel_data)
+    elif gp['output_format'] == output_format.TEXT:
+        output_text(benchmarks, raw_totals, rel_data)
+    elif gp['output_format'] == output_format.MD:
+        output_md(benchmarks, raw_totals, rel_data)
+    elif gp['output_format'] == output_format.CSV:
+        output_csv(benchmarks, raw_totals, rel_data)
+    elif gp['output_format'] == output_format.BASELINE:
+        output_baseline(benchmarks, raw_section_data)
+
+    if successful:
+        return raw_totals, rel_data
+
+    # Otherwise failure return
+    return [], []
+
+
+def output_stats_json(geomean, geosd, georange):
+    """Output the stats in JSON format."""
+    log.info(f'  "geomean" : {geomean:.2f},')
+    log.info(f'  "geosd" : {geosd:.2f},')
+    log.info(f'  "georange" : {georange:.2f}')
+
+    log.info('}')
+
+
+def output_stats_text(geomean, geosd, georange):
+    """Output the stats in plain text format."""
+    log.info('---------------  --------')
+    log.info(f'Geometric mean   {geomean:8.2f}')
+    log.info(f'Geometric s.d.   {geosd:8.2f}')
+    log.info(f'Geometric range  {georange:8.2f}')
+
+
+def output_stats_md(geomean, geosd, georange):
+    """Output the stats in MarkDown format."""
+    log.info('|                   |          |')
+    log.info(f'| Geometric mean    | {geomean:8.2f} |')
+    log.info(f'| Geometric s.d.    | {geosd:8.2f} |')
+    log.info(f'| Geometric range   | {georange:8.2f} |')
+
+
+def output_stats_csv(geomean, geosd, georange):
+    """Output the stats in CSV format."""
+    log.info('"",""')
+    log.info(f'"Geometric mean","{geomean:.2f}"')
+    log.info(f'"Geometric s.d.","{geosd:.2f}"')
+    log.info(f'"Geometric range","{georange:.2f}"')
+
+
+def main():
+    """Main program driving measurement of benchmark size"""
+    # Establish the root directory of the repository, since we know this file is
+    # in that directory.
+    gp['rootdir'] = os.path.abspath(os.path.dirname(__file__))
+
+    # Parse arguments using standard technology
+    parser = build_parser()
+    args = parser.parse_args()
+
+    # Establish logging
+    setup_logging(args.logdir, 'size')
+    log_args(args)
+
+    # Check args are OK (have to have logging and build directory set up first)
+    validate_args(args)
+
+    # Find the benchmarks
+    benchmarks = find_benchmarks()
+    log_benchmarks(benchmarks)
+
+    # Collect the size data for the benchmarks
+    raw_data, rel_data = collect_data(benchmarks)
+    # We can't compute geometric SD on the fly, so we need to collect all the
+    # data and then process it in two passes. We could do the first processing
+    # as we collect the data, but it is clearer to do the three things
+    # separately. Given the size of datasets with which we are concerned the
+    # compute overhead is not significant.
+    if raw_data:
+        if not gp['absolute']:
+            geomean, geosd, georange = embench_stats(benchmarks, raw_data,
+                                                     rel_data)
+            if gp['output_format'] == output_format.JSON:
+                output_stats_json(geomean, geosd, georange)
+            elif gp['output_format'] == output_format.TEXT:
+                output_stats_text(geomean, geosd, georange)
+            elif gp['output_format'] == output_format.MD:
+                output_stats_md(geomean, geosd, georange)
+            elif gp['output_format'] == output_format.CSV:
+                output_stats_csv(geomean, geosd, georange)
+    else:
+        log.info('ERROR: Failed to compute size benchmarks')
+        sys.exit(1)
+
+
+# Make sure we have new enough Python and only run if this is the main package
+
+check_python_version(3, 6)
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/benchmark_speed.py b/benchmark_speed.py
new file mode 100755
index 0000000..be9f5ce
--- /dev/null
+++ b/benchmark_speed.py
@@ -0,0 +1,554 @@
+#!/usr/bin/env python3
+
+# Script to benchmark execution speed.
+
+# Copyright (C) 2017, 2019 Embecosm Limited
+#
+# Contributor: Graham Markall <graham.markall@embecosm.com>
+# Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+# Contributor: Konrad Moreon <konrad.moron@tum.de>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Benchmark speed.
+
+This version is suitable when using a version of GDB which can launch a GDB
+server to use as a target.
+"""
+
+import argparse
+import importlib
+import os
+import sys
+import platform
+
+from json import loads
+
+sys.path.append(
+    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'pylib')
+)
+
+from embench_core import check_python_version
+from embench_core import log
+from embench_core import gp
+from embench_core import setup_logging
+from embench_core import log_args
+from embench_core import find_benchmarks
+from embench_core import log_benchmarks
+from embench_core import embench_stats
+from embench_core import output_format
+
+
+def get_common_args():
+    """Build a parser for all the arguments"""
+    parser = argparse.ArgumentParser(description='Compute the size benchmark')
+
+    parser.add_argument(
+        '--builddir',
+        type=str,
+        default='bd',
+        help='Directory holding all the binaries',
+    )
+    parser.add_argument(
+        '--logdir',
+        type=str,
+        default='logs',
+        help='Directory in which to store logs',
+    )
+    parser.add_argument(
+        '--baselinedir',
+        type=str,
+        default='baseline-data',
+        help='Directory which contains baseline data',
+    )
+    parser.add_argument(
+        '--absolute',
+        action='store_true',
+        help='Specify to show absolute results',
+    )
+    parser.add_argument(
+        '--relative',
+        dest='absolute',
+        action='store_false',
+        help='Specify to show relative results (the default)',
+    )
+    parser.add_argument(
+        '--json-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.JSON,
+        help='Specify to output in JSON format',
+    )
+    parser.add_argument(
+        '--text-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.TEXT,
+        help='Specify to output as plain text (the default)',
+    )
+    parser.add_argument(
+        '--md-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.MD,
+        help='Specify to output as Markdown',
+    )
+    parser.add_argument(
+        '--csv-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.CSV,
+        help='Specify to output as CSV',
+    )
+    parser.add_argument(
+        '--baseline-output',
+        dest='output_format',
+        action='store_const',
+        const=output_format.BASELINE,
+        help='Specify to output in a format suitable for use as a baseline'
+    )
+    parser.add_argument(
+        '--json-comma',
+        action='store_true',
+        help='Specify to append a comma to the JSON output',
+    )
+    parser.add_argument(
+        '--no-json-comma',
+        dest='json_comma',
+        action='store_false',
+        help='Specify to not append a comma to the JSON output',
+    )
+    parser.add_argument(
+        '--target-module',
+        type=str,
+        required=True,
+        help='Python module with routines to run benchmarks',
+    )
+    parser.add_argument(
+        '--timeout',
+        type=int,
+        default=30,
+        help='Timeout used for running each benchmark program'
+    )
+    parser.add_argument(
+        '--file-extension',
+        type=str,
+        default=None,
+        help='Optional file extension to append to bench mark names when searching for binaries.'
+    )
+    parser.add_argument(
+        '--gsf',
+        type=int,
+        default=1,
+        help='Global scale factor for benchmarks'
+    )
+    parser.add_argument(
+        '--cpu-mhz',
+        type=int,
+        default=16,
+        help='Processor clock speed in MHz'
+    )
+
+    return parser.parse_known_args()
+
+
+def validate_args(args):
+    """Check that supplied args are all valid. By definition logging is
+       working when we get here.
+
+       Update the gp dictionary with all the useful info"""
+    gp['bd'] = args.builddir if os.path.isabs(args.builddir) else os.path.join(gp['rootdir'], args.builddir)
+
+    if not os.path.isdir(gp['bd']):
+        log.error(f'ERROR: build directory {gp["bd"]} not found: exiting')
+        sys.exit(1)
+
+    if not os.access(gp['bd'], os.R_OK):
+        log.error(f'ERROR: Unable to read build directory {gp["bd"]}: exiting')
+        sys.exit(1)
+
+    gp['baseline_dir'] = args.baselinedir if os.path.isabs(args.baselinedir) else os.path.join(gp['rootdir'], args.baselinedir)
+
+    gp['absolute'] = args.absolute
+    if args.output_format:
+        gp['output_format'] = args.output_format
+    else:
+        gp['output_format'] = output_format.TEXT
+
+    gp['timeout'] = args.timeout
+
+    if args.file_extension is None:
+        gp['file_extension'] = '.exe' if platform.system() == 'Windows' else ''
+    else:
+        gp['file_extension'] = args.file_extension
+
+    try:
+        newmodule = importlib.import_module(args.target_module)
+    except ImportError as error:
+        log.error(
+            f'ERROR: Target module import failure: {error}: exiting'
+        )
+        sys.exit(1)
+
+    globals()['get_target_args'] = newmodule.get_target_args
+    globals()['run_benchmark'] = newmodule.run_benchmark
+
+
+def benchmark_speed(bench, args):
+    """Time the benchmark.  "args" is a namespace of arguments, including
+       those specific to the target.  Result is a time in milliseconds, or
+       zero on failure."""
+    appdir = os.path.join(gp['bd_benchdir'], bench)
+    appexe = os.path.join(appdir,f"{bench}{gp['file_extension']}")
+
+    res = None
+    if os.path.isfile(appexe):
+        res = run_benchmark(bench, appexe, args)
+        if res is None:
+            log.warning(f'Warning: Run of {bench} failed.')
+    else:
+        log.warning(f'Warning: {bench} executable not found.')
+
+    if res is None:
+        print ('failed')
+        return 0
+    return res
+
+def run_benchmarks(benchmarks, args):
+    """Run the benchmarks, recording the raw times.
+
+       return a flag indicating success, a list of the benchmarks run
+       successfully  and the raw data as a dictionary.  Only benchmarks for
+       which we suceeded will have an entry."""
+    successful = True
+    benchmarks_run = []
+    raw_data = {}
+
+    # Run the benchmarks
+    for bench in benchmarks:
+        raw_data[bench] = float(benchmark_speed(bench, args))
+
+    # Delete the benchmark if it didn't succeed, record it if it did.
+    for bench in benchmarks:
+        if raw_data[bench] == 0.0:
+            del raw_data[bench]
+            successful = False
+        else:
+            benchmarks_run.append(bench)
+            raw_data[bench] = float(raw_data[bench])
+
+    return successful, benchmarks_run, raw_data
+
+def compute_rel(benchmarks_run, raw_data, args):
+    """Generate relative speed data.  Return a dictionary of relative
+       scores.  In this case, we need to scale the raw scores by the scaling
+       factor"""
+    rel_data = {}
+
+    # Get the baseline data
+    speed_baseline = os.path.join(gp['baseline_dir'], 'speed.json')
+    with open(speed_baseline) as fileh:
+        baseline = loads(fileh.read())
+
+    # We know there must be data
+    for bench in benchmarks_run:
+        rel_data[bench] = baseline[bench] / raw_data[bench] * args.gsf
+
+    return rel_data
+
+def output_json(benchmarks_run, raw_data, rel_data, args):
+    """Output the data table in a JSON format.  We are given a list of
+       benchmarks for which we have data"""
+    log.info('{  "speed results" :')
+    log.info('  { "detailed speed results" :')
+
+    for bench in benchmarks_run:
+        output = f'{round(raw_data[bench])}' if args.absolute else f'{rel_data[bench]:.2f}'
+
+        if bench == benchmarks_run[0]:
+            log.info(f'    {{ "{bench}" : {output},')
+        elif bench == benchmarks_run[-1]:
+            log.info(f'      "{bench}" : {output}')
+        else:
+            log.info(f'      "{bench}" : {output},')
+    log.info('    },')
+
+def output_text (benchmarks_run, raw_data, rel_data, args):
+    """Output the data table in plain text format.  We are given a list of
+       benchmarks for which we have data"""
+    if gp['absolute']:
+        log.info('Benchmark           Speed')
+        log.info('---------           -----')
+    else:
+        log.info('Benchmark           Speed Speed/MHz')
+        log.info('---------           ----- ---------')
+
+    for bench in benchmarks_run:
+        if gp['absolute']:
+            output = f'{round(raw_data[bench]):8,}'
+            log.info(f'{bench:15}  {output:8}')
+        else:
+            rel_per_mhz = rel_data[bench] / args.cpu_mhz
+            output1 = f'  {rel_data[bench]:6.2f}'
+            output2 = f'  {rel_per_mhz:6.2f}'
+            log.info(f'{bench:15}  {output1:8}  {output2:8}')
+
+def output_md (benchmarks_run, raw_data, rel_data, args):
+    """Output the data table in Markdown format.  We are given a list of
+       benchmarks for which we have data"""
+    if gp['absolute']:
+        log.info('| Benchmark       |      Speed |')
+        log.info('| :-------------- | ---------: |')
+    else:
+        log.info('| Benchmark       |      Speed |  Speed/MHz |')
+        log.info('| :-------------- | ---------: | ---------: |')
+
+    for bench in benchmarks_run:
+        if gp['absolute']:
+            output = f'{round(raw_data[bench]):8,}'
+            log.info(f'| {bench:15} |   {output:8} |')
+        else:
+            rel_per_mhz = rel_data[bench] / args.cpu_mhz
+            output1 = f'  {rel_data[bench]:6.2f}'
+            output2 = f'  {rel_per_mhz:6.2f}'
+            log.info(f'| {bench:15} |   {output1:8} |   {output2:8} |')
+
+def output_csv (benchmarks_run, raw_data, rel_data, args):
+    """Output the data table in CSV format.  We are given a list of
+       benchmarks for which we have data"""
+    if gp['absolute']:
+        log.info('"Benchmark","Speed"')
+    else:
+        log.info('"Benchmark","Speed","Speed/MHz"')
+
+    for bench in benchmarks_run:
+        if gp['absolute']:
+            log.info(f'"{bench}","{round(raw_data[bench])}"')
+        else:
+            rel_per_mhz = rel_data[bench] / args.cpu_mhz
+            log.info(f'"{bench}","{rel_data[bench]:.2f}","{rel_per_mhz:.2f}"')
+
+def output_baseline(benchmarks_run, raw_data):
+    """Output the data table in a JSON format for use as the baseline table.
+       We are given a list of  benchmarks for which we have data"""
+    log.info('{')
+    for bench in benchmarks_run:
+        if bench == benchmarks_run[-1]:
+            log.info(f'  "{bench}" : {round(raw_data[bench]):0}')
+        else:
+            log.info(f'  "{bench}" : {round(raw_data[bench]):0},')
+
+    log.info('}')
+
+def collect_data(benchmarks, args):
+    """Collect and log all the raw and optionally relative data associated with
+       the list of benchmarks supplied in the "benchmarks" argument. "remant"
+       is left over args from the command line, which may be useful to the
+       benchmark running procs.
+
+       Return the raw data and relative data as a list.  The raw data may be
+       empty if there is a failure. The relative data will be empty if only
+       absolute results have been requested."""
+
+    # Get the raw data
+    successful, benchmarks_run, raw_data = run_benchmarks(benchmarks, args)
+
+    # Baseline data is held external to the script. Import it here if we are
+    # doing relative output and then generate the relative data
+    if not gp['absolute']:
+        rel_data = compute_rel(benchmarks, raw_data, args)
+    else:
+        rel_data = {}
+
+    # Output it
+    if gp['output_format'] == output_format.JSON:
+        output_json (benchmarks_run, raw_data, rel_data, args)
+    elif gp['output_format'] == output_format.TEXT:
+        output_text (benchmarks_run, raw_data, rel_data, args)
+    elif gp['output_format'] == output_format.MD:
+        output_md (benchmarks_run, raw_data, rel_data, args)
+    elif gp['output_format'] == output_format.CSV:
+        output_csv (benchmarks_run, raw_data, rel_data, args)
+    elif gp['output_format'] == output_format.BASELINE:
+        output_baseline(benchmarks_run, raw_data)
+
+    if successful:
+        return raw_data, rel_data
+
+    # Otherwise failure return
+    return [], []
+
+
+def output_stats_json(geomean, geosd, georange, args):
+    """Output the statistical summary in JSON format.
+
+       Note that we manually generate the JSON output, rather than using the
+       dumps method, because the result will be manually edited, and we want
+       to guarantee the layout."""
+
+    opt_comma = ',' if args.json_comma else ''
+    if gp['absolute']:
+        geomean_op = f'{int(geomean):0,}'
+        geosd_op = f'{geosd:.2f}'
+        georange_op = f'{int(georange):0,}'
+    else:
+        geomean_op = f'{geomean:.2f}'
+        geosd_op = f'{geosd:.2f}'
+        georange_op = f'{georange:.2f}'
+
+    # Output the results
+    log.info(f'    "speed geometric mean" : {geomean_op},')
+    log.info(f'    "speed geometric standard deviation" : {geosd_op}')
+    log.info(f'    "speed geometric range" : {georange_op}')
+    log.info('  }' + f'{opt_comma}')
+
+
+def output_stats_text(geomean, geosd, georange, args):
+    """Output the statistical summary in plain text format."""
+
+    if gp['absolute']:
+        geomean_op = f'{int(geomean):8,}'
+        geosd_op = f'     {geosd:6.2f}'
+        georange_op = f'{int(georange):8,}'
+    else:
+        geomean_mhz = geomean / float(args.cpu_mhz)
+        georange_mhz = georange / float(args.cpu_mhz)
+        geomean_op = f'  {geomean:6.2f}'
+        geosd_op = f'  {geosd:6.2f}'
+        georange_op = f'  {georange:6.2f}'
+        geomean_mhz_op = f'  {geomean_mhz:6.2f}'
+        geosd_mhz_op = f'  {geosd:6.2f}'
+        georange_mhz_op = f'  {georange_mhz:6.2f}'
+
+    # Output the results
+    if gp['absolute']:
+        log.info('---------           -----')
+        log.info(f'Geometric mean   {geomean_op}')
+        log.info(f'Geometric SD     {geosd_op}')
+        log.info(f'Geometric range  {georange_op}')
+    else:
+        log.info('---------           ----- ---------')
+        log.info(f'Geometric mean   {geomean_op}  {geomean_mhz_op}')
+        log.info(f'Geometric SD     {geosd_op}  {geosd_mhz_op}')
+        log.info(f'Geometric range  {georange_op}  {georange_mhz_op}')
+
+    log.info('All benchmarks run successfully')
+
+def output_stats_md(geomean, geosd, georange, args):
+    """Output the statistical summary in Markdown format."""
+
+    if gp['absolute']:
+        geomean_op = f'{int(geomean):8,}'
+        geosd_op = f'  {geosd:6.2f}'
+        georange_op = f'{int(georange):8,}'
+    else:
+        geomean_mhz = geomean / float(args.cpu_mhz)
+        georange_mhz = georange / float(args.cpu_mhz)
+        geomean_op = f'  {geomean:6.2f}'
+        geosd_op = f'  {geosd:6.2f}'
+        georange_op = f'  {georange:6.2f}'
+        geomean_mhz_op = f'  {geomean_mhz:6.2f}'
+        geosd_mhz_op = f'  {geosd:6.2f}'
+        georange_mhz_op = f'  {georange_mhz:6.2f}'
+
+        # Output the results
+    if gp['absolute']:
+        log.info('|                 |            |')
+        log.info(f'| Geometric mean  |   {geomean_op} |')
+        log.info(f'| Geometric SD    |   {geosd_op} |')
+        log.info(f'| Geometric range |   {georange_op} |')
+    else:
+        log.info('|                 |            |            |')
+        log.info(f'| Geometric mean  |   {geomean_op} |   {geomean_mhz_op} |')
+        log.info(f'| Geometric SD    |   {geosd_op} |   {geosd_mhz_op} |')
+        log.info(f'| Geometric range |   {georange_op} |   {georange_mhz_op} |')
+
+def output_stats_csv(geomean, geosd, georange, args):
+    """Output the statistical summary in CSV format."""
+
+    if gp['absolute']:
+        geomean_op = f'{int(geomean)}'
+        geosd_op = f'{geosd:.2f}'
+        georange_op = f'{int(georange)}'
+    else:
+        geomean_mhz = geomean / float(args.cpu_mhz)
+        georange_mhz = georange / float(args.cpu_mhz)
+        geomean_op = f'{geomean:.2f}'
+        geosd_op = f'{geosd:.2f}'
+        georange_op = f'{georange:.2f}'
+        geomean_mhz_op = f'{geomean_mhz:.2f}'
+        geosd_mhz_op = f'{geosd:.2f}'
+        georange_mhz_op = f'{georange_mhz:.2f}'
+
+    # Output the results
+    if gp['absolute']:
+        log.info(f'"Geometric mean","{geomean_op}"')
+        log.info(f'"Geometric SD","{geosd_op}"')
+        log.info(f'"Geometric range","{georange_op}"')
+    else:
+        log.info(f'"Geometric mean","{geomean_op}","{geomean_mhz_op}"')
+        log.info(f'"Geometric SD","{geosd_op}","{geosd_mhz_op}"')
+        log.info(f'"Geometric range","{georange_op}","{georange_mhz_op}"')
+
+def generate_stats(benchmarks, raw_data, rel_data, args):
+    """Generate the summary statistics at the end.  This is only computed when
+       we have a successful run, so we know all benchmarks are represented."""
+    if gp['output_format'] != output_format.BASELINE:
+        geomean, geosd, georange = embench_stats(benchmarks, raw_data, rel_data)
+
+    if gp['output_format'] == output_format.JSON:
+        output_stats_json (geomean, geosd, georange, args)
+    elif gp['output_format'] == output_format.TEXT:
+        output_stats_text (geomean, geosd, georange, args)
+    elif gp['output_format'] == output_format.MD:
+        output_stats_md (geomean, geosd, georange, args)
+    elif gp['output_format'] == output_format.CSV:
+        output_stats_csv (geomean, geosd, georange, args)
+
+def main():
+    """Main program driving measurement of benchmark size"""
+    # Establish the root directory of the repository, since we know this file is
+    # in that directory.
+    gp['rootdir'] = os.path.abspath(os.path.dirname(__file__))
+
+    # Parse arguments common to all speed testers, and get list of those
+    # remaining.
+    args, remnant = get_common_args()
+
+    # Establish logging
+    setup_logging(args.logdir, 'speed')
+    log_args(args)
+
+    # Check args are OK (have to have logging and build directory set up first)
+    validate_args(args)
+
+    # Parse target specific args
+    args = argparse.Namespace(**vars(args), **vars(get_target_args(remnant)))
+
+    # Find the benchmarks
+    benchmarks = find_benchmarks()
+    log_benchmarks(benchmarks)
+
+    # Collect the speed data for the benchmarks.
+    raw_data, rel_data = collect_data(benchmarks, args)
+
+    # We can't compute geometric SD on the fly, so we need to collect all the
+    # data and then process it in two passes. We could do the first processing
+    # as we collect the data, but it is clearer to do the three things
+    # separately. Given the size of datasets with which we are concerned the
+    # compute overhead is not significant.
+    if raw_data:
+        generate_stats(benchmarks, raw_data, rel_data, args)
+    else:
+        log.info('ERROR: Failed to compute speed benchmarks')
+        sys.exit(1)
+
+
+# Make sure we have new enough Python and only run if this is the main package
+
+check_python_version(3, 6)
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/dsp_inc.mk b/dsp_inc.mk
deleted file mode 100644
index 8e8b128..0000000
--- a/dsp_inc.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-
-DSP_DIR    =$(ROOT)
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-
-##############################################################
-# DEFINES
-##############################################################
-
-DSP_DEF    +=
-
-
-##############################################################
-# INCLUDES
-##############################################################
-
-DSP_INC    +=-I $(DSP_DIR)/include
diff --git a/examples/arm/stm32f4discovery/.gitignore b/examples/arm/stm32f4discovery/.gitignore
new file mode 100644
index 0000000..151a620
--- /dev/null
+++ b/examples/arm/stm32f4discovery/.gitignore
@@ -0,0 +1,2 @@
+*.o
+*.a
\ No newline at end of file
diff --git a/targets/stm32f4discovery/target.mk b/examples/arm/stm32f4discovery/Makefile
similarity index 66%
rename from targets/stm32f4discovery/target.mk
rename to examples/arm/stm32f4discovery/Makefile
index de1aaa0..a26977f 100644
--- a/targets/stm32f4discovery/target.mk
+++ b/examples/arm/stm32f4discovery/Makefile
@@ -30,14 +30,21 @@ TGT_DEF    +=-DSTM32F407xx
 TGT_INC    +=-I $(TGT_DIR)/.. -I $(TGT_DIR)
 
 ### FLAGS
-TGT_FLG    +=$(TGT_ISA) $(TGT_DEF)
-TGT_LD     +=-T $(TGT_DIR)/link.ld
+TGT_FLG    +=$(TGT_ISA) $(TGT_DEF) -Os
 
-### SOURCES
-TGT_SRC    +=$(TGT_DIR)/boardsupport.c
-TGT_SRC    +=$(TGT_DIR)/startup.S
-TGT_SRC    +=$(TGT_DIR)/syscalls.c
-TGT_SRC    +=$(TGT_DIR)/system_stm32f4.c
+libstartup.a: startup.o system_stm32f4.o syscalls.o
+	arm-none-eabi-ar rcs $@ $^
 
-### OBJECTS
-TGT_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TGT_SRC))))
+startup.o: startup.S
+	arm-none-eabi-gcc -c -o $@ $^ $(TGT_FLG)
+
+syscalls.o: syscalls.c
+	arm-none-eabi-gcc -c -o $@ $^ $(TGT_FLG)
+
+system_stm32f4.o: system_stm32f4.c
+	arm-none-eabi-gcc -c -o $@ $^ $(TGT_FLG)
+
+clean:
+	rm -rf libstartup.a *.o
+
+.PHONY: clean
\ No newline at end of file
diff --git a/examples/arm/stm32f4discovery/README.md b/examples/arm/stm32f4discovery/README.md
new file mode 100644
index 0000000..c819618
--- /dev/null
+++ b/examples/arm/stm32f4discovery/README.md
@@ -0,0 +1,62 @@
+
+## Building the Arm support library
+
+This only need to be done once. It is not ideal, but this library is always built in tree.
+
+```sh
+pushd examples/arm/stm32f4discovery
+make
+popd
+```
+
+## Building for speed
+
+If you haven't already done it, build the Arm support library (see above)
+
+Then you can build the benchmarks.  The compilation options here are those used for the baseline Embench 2.0 results.
+```sh
+cflags="-O2 -ffunction-sections -fdata-sections -mcpu=cortex-m4 \
+  -mfloat-abi=hard -mthumb -mfpu=fpv4-sp-d16"
+ldflags="-O2 -Wl,--gc-sections -mcpu=cortex-m4 -mfloat-abi=hard \
+  -mthumb \
+  -T\${CONFIG_DIR}/link.ld -L\${CONFIG_DIR} \
+  -static -nostartfiles --specs=nosys.specs"
+scons --config-dir=examples/arm/stm32f4discovery/ \
+  --build-dir=bd-arm-gcc-14.0.1-speed \
+  cc=arm-none-eabi-gcc cflags="${cflags}" \
+  ldflags="${ldflags}" user_libs='m c startup' gsf=1
+```
+
+## Building for size
+
+If you haven't already done it, build the Arm support library (see above)
+
+Then you can build the benchmarks.  The compilation options here are those used for the baseline Embench 2.0 results.
+
+```sh
+cflags="-Os -ffunction-sections -fdata-sections -mcpu=cortex-m4 \
+  -mfloat-abi=hard -mthumb -mfpu=fpv4-sp-d16"
+ldflags="-Os -Wl,--gc-sections -mcpu=cortex-m4 -mfloat-abi=hard \
+  -mthumb \
+  -T\${CONFIG_DIR}/link.ld -L\${CONFIG_DIR} \
+  -static -nostartfiles --specs=nosys.specs"
+scons --config-dir=examples/arm/stm32f4discovery/ \
+  --build-dir=bd-arm-gcc-14.0.1-size \
+  cc=arm-none-eabi-gcc cflags="${cflags}" \
+  ldflags="${ldflags}" user_libs='m c startup' gsf=1
+```
+Note that a global scale factor of 1 is always used for code size runs.
+
+## Measuring speed
+
+```sh
+./benchmark_speed.py --builddir bd-arm-gcc-14.0.1-speed \
+  --target-module=run_stm32f4-discovery \
+  --gdb-command=gdb-multiarch --gsf=1 --cpu-mhz=16
+```
+
+## Measuring size
+
+```sh
+./benchmark_size.py --builddir bd-arm-gcc-14.0.1-size
+```
diff --git a/targets/stm32f4discovery/boardsupport.c b/examples/arm/stm32f4discovery/boardsupport.c
similarity index 100%
rename from targets/stm32f4discovery/boardsupport.c
rename to examples/arm/stm32f4discovery/boardsupport.c
diff --git a/examples/arm/stm32f4discovery/boardsupport.h b/examples/arm/stm32f4discovery/boardsupport.h
new file mode 100644
index 0000000..0fd66cc
--- /dev/null
+++ b/examples/arm/stm32f4discovery/boardsupport.h
@@ -0,0 +1,15 @@
+/* Copyright (C) 2017 Embecosm Limited and University of Bristol
+
+   Contributor Graham Markall <graham.markall@embecosm.com>
+
+   This file is part of Embench and was formerly part of the Bristol/Embecosm
+   Embedded Benchmark Suite.
+
+   SPDX-License-Identifier: GPL-3.0-or-later */
+
+#ifndef _BOARDSUPPORT_H_
+#define _BOARDSUPPORT_H_
+#define STM32F407xx
+#include "support.h"
+
+#endif // _BOARDSUPPORT_H_
diff --git a/targets/stm32f4discovery/cmsis_compiler.h b/examples/arm/stm32f4discovery/cmsis_compiler.h
similarity index 100%
rename from targets/stm32f4discovery/cmsis_compiler.h
rename to examples/arm/stm32f4discovery/cmsis_compiler.h
diff --git a/targets/stm32f4discovery/cmsis_gcc.h b/examples/arm/stm32f4discovery/cmsis_gcc.h
similarity index 100%
rename from targets/stm32f4discovery/cmsis_gcc.h
rename to examples/arm/stm32f4discovery/cmsis_gcc.h
diff --git a/targets/stm32f4discovery/cmsis_version.h b/examples/arm/stm32f4discovery/cmsis_version.h
similarity index 100%
rename from targets/stm32f4discovery/cmsis_version.h
rename to examples/arm/stm32f4discovery/cmsis_version.h
diff --git a/targets/stm32f4discovery/core_cm4.h b/examples/arm/stm32f4discovery/core_cm4.h
similarity index 100%
rename from targets/stm32f4discovery/core_cm4.h
rename to examples/arm/stm32f4discovery/core_cm4.h
diff --git a/targets/stm32f4discovery/link.ld b/examples/arm/stm32f4discovery/link.ld
similarity index 100%
rename from targets/stm32f4discovery/link.ld
rename to examples/arm/stm32f4discovery/link.ld
diff --git a/targets/stm32f4discovery/mpu_armv7.h b/examples/arm/stm32f4discovery/mpu_armv7.h
similarity index 100%
rename from targets/stm32f4discovery/mpu_armv7.h
rename to examples/arm/stm32f4discovery/mpu_armv7.h
diff --git a/targets/stm32f4discovery/startup.S b/examples/arm/stm32f4discovery/startup.S
similarity index 99%
rename from targets/stm32f4discovery/startup.S
rename to examples/arm/stm32f4discovery/startup.S
index cc5f27e..2b511c6 100644
--- a/targets/stm32f4discovery/startup.S
+++ b/examples/arm/stm32f4discovery/startup.S
@@ -98,6 +98,7 @@ LoopFillZerobss:
     bl __libc_init_array
 /* Call the application's entry point.*/
   bl  main
+AtExit:
   bx  lr    
 .size  Reset_Handler, .-Reset_Handler
 
diff --git a/targets/stm32f4discovery/stm32f407xx.h b/examples/arm/stm32f4discovery/stm32f407xx.h
similarity index 100%
rename from targets/stm32f4discovery/stm32f407xx.h
rename to examples/arm/stm32f4discovery/stm32f407xx.h
diff --git a/targets/stm32f4discovery/stm32f4xx.h b/examples/arm/stm32f4discovery/stm32f4xx.h
similarity index 100%
rename from targets/stm32f4discovery/stm32f4xx.h
rename to examples/arm/stm32f4discovery/stm32f4xx.h
diff --git a/targets/stm32f4discovery/syscalls.c b/examples/arm/stm32f4discovery/syscalls.c
similarity index 98%
rename from targets/stm32f4discovery/syscalls.c
rename to examples/arm/stm32f4discovery/syscalls.c
index a339058..7c1ff9f 100644
--- a/targets/stm32f4discovery/syscalls.c
+++ b/examples/arm/stm32f4discovery/syscalls.c
@@ -42,10 +42,6 @@ extern int __io_getchar(void) __attribute__((weak));
 static uint8_t *__sbrk_heap_end = NULL;
 
 
-char *__env[1] = { 0 };
-char **environ = __env;
-
-
 /* Functions */
 
 int __io_putchar(int ch) {
diff --git a/targets/stm32f4discovery/system_stm32f4.c b/examples/arm/stm32f4discovery/system_stm32f4.c
similarity index 98%
rename from targets/stm32f4discovery/system_stm32f4.c
rename to examples/arm/stm32f4discovery/system_stm32f4.c
index ea145ec..d3cc43a 100644
--- a/targets/stm32f4discovery/system_stm32f4.c
+++ b/examples/arm/stm32f4discovery/system_stm32f4.c
@@ -50,7 +50,8 @@ void SystemInit(void)
     SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2));  /* set CP10 and CP11 Full Access */
   #endif
 }
-
+void _init(void) {SystemInit();}
+void _fini(void) {}
 /******************************************************************************/
 /*           Cortex-M4 Processor Interruption and Exception Handlers          */
 /******************************************************************************/
diff --git a/examples/native/README.md b/examples/native/README.md
new file mode 100644
index 0000000..291c5f1
--- /dev/null
+++ b/examples/native/README.md
@@ -0,0 +1,25 @@
+# Configuration
+
+## Building Speed
+
+```sh
+scons --config-dir=examples/native/ cflags="-O2 -fdata-sections -ffunction-sections" ldflags="-O2 -Wl,-gc-sections" user_libs=-lm
+```
+
+## Building Size
+
+```sh
+scons --config-dir=examples/native/ cflags="-Os -fdata-sections -ffunction-sections" ldflags="-Os -rdynamic -Wl,-gc-sections" user_libs=-lm
+```
+
+## Measuring speed
+
+```sh
+./benchmark_speed.py --target-module=run_stm32f4-discovery --gsf=1 --cpu-mhz=16
+```
+
+## Measuring size
+
+```sh
+./benchmark_size.py
+```
diff --git a/targets/native/boardsupport.c b/examples/native/boardsupport.c
similarity index 100%
rename from targets/native/boardsupport.c
rename to examples/native/boardsupport.c
diff --git a/examples/native/boardsupport.h b/examples/native/boardsupport.h
new file mode 100644
index 0000000..ddb98e2
--- /dev/null
+++ b/examples/native/boardsupport.h
@@ -0,0 +1,15 @@
+/* Copyright (C) 2017 Embecosm Limited and University of Bristol
+
+   Contributor Graham Markall <graham.markall@embecosm.com>
+
+   This file is part of Embench and was formerly part of the Bristol/Embecosm
+   Embedded Benchmark Suite.
+
+   SPDX-License-Identifier: GPL-3.0-or-later */
+
+#ifndef _BOARDSUPPORT_H_
+#define _BOARDSUPPORT_H_
+
+#include "support.h"
+
+#endif // _BOARDSUPPORT_H_
diff --git a/pylib/embench_core.py b/pylib/embench_core.py
new file mode 100644
index 0000000..781c5cb
--- /dev/null
+++ b/pylib/embench_core.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+
+# Common python procedures for use across Embench.
+
+# Copyright (C) 2017, 2019 Embecosm Limited
+#
+# Contributor: Graham Markall <graham.markall@embecosm.com>
+# Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Embench common procedures.
+
+This version is suitable when using a version of GDB which can launch a GDB
+server to use as a target.
+"""
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from enum import Enum
+
+
+# What we export
+
+__all__ = [
+    'check_python_version',
+    'log',
+    'gp',
+    'output_format',
+    'setup_logging',
+    'log_args',
+    'log_benchmarks',
+    'embench_stats',
+    'arglist_to_str',
+]
+
+# Handle for the logger
+log = logging.getLogger()
+
+# All the global parameters
+gp = dict()
+
+# Different formats of output that can be generated by the benchmarking scripts
+class output_format(Enum):
+    JSON = 1
+    TEXT = 2
+    MD = 3
+    CSV = 4
+    BASELINE = 5
+
+
+# Make sure we have new enough python
+def check_python_version(major, minor):
+    """Check the python version is at least {major}.{minor}."""
+    if ((sys.version_info[0] < major)
+        or ((sys.version_info[0] == major) and (sys.version_info[1] < minor))):
+        log.error('ERROR: Requires Python {mjr}.{mnr} or later'.format(mjr=major, mnr=minor))
+        sys.exit(1)
+
+
+def create_logdir(logdir):
+    """Create the log directory, which can be relative to the root directory
+       or absolute"""
+    if not os.path.isabs(logdir):
+        logdir = os.path.join(gp['rootdir'], logdir)
+
+    if not os.path.isdir(logdir):
+        try:
+            os.makedirs(logdir)
+        except PermissionError:
+            raise PermissionError('Unable to create log directory {dir}'.format(dir=logdir))
+
+    if not os.access(logdir, os.W_OK):
+        raise PermissionError('Unable to write to log directory {dir}'.format(dir=logdir))
+
+    return logdir
+
+
+def setup_logging(logdir, prefix):
+    """Set up logging in the directory specified by "logdir".
+
+       The log file name is the "prefix" argument followed by a timestamp.
+
+       Debug messages only go to file, everything else also goes to the
+       console."""
+
+    # Create the log directory first if necessary.
+    logdir_abs = create_logdir(logdir)
+    logfile = os.path.join(
+        logdir_abs, time.strftime('{pref}-%Y-%m-%d-%H%M%S.log'.format(pref=prefix))
+    )
+
+    # Set up logging
+    log.setLevel(logging.DEBUG)
+    cons_h = logging.StreamHandler(sys.stdout)
+    cons_h.setLevel(logging.INFO)
+    log.addHandler(cons_h)
+    file_h = logging.FileHandler(logfile)
+    file_h.setLevel(logging.DEBUG)
+    log.addHandler(file_h)
+
+    # Log where the log file is
+    log.debug('Log file: {log}\n'.format(log=logfile))
+    log.debug('')
+
+
+def log_args(args):
+    """Record all the argument values"""
+    log.debug('Supplied arguments')
+    log.debug('==================')
+
+    for arg in vars(args):
+        realarg = re.sub('_', '-', arg)
+        val = getattr(args, arg)
+        log.debug('--{arg:20}: {val}'.format(arg=realarg, val=val))
+
+    log.debug('')
+
+
+def find_benchmarks():
+    """Enumerate all the benchmarks in alphabetical order.  The benchmarks are
+       found in the 'src' subdirectory of the root directory.  Set up global
+       parameters for the source and build benchmark directories.
+
+       Return the list of benchmarks."""
+    gp['benchdir'] = os.path.join(gp['rootdir'], 'src')
+    gp['bd_benchdir'] = os.path.join(gp['bd'], 'src')
+    dirlist = os.listdir(gp['benchdir'])
+
+    benchmarks = []
+
+    for bench in dirlist:
+        abs_b = os.path.join(gp['benchdir'], bench)
+        if os.path.isdir(abs_b):
+            benchmarks.append(bench)
+
+    benchmarks.sort()
+
+    return benchmarks
+
+
+def log_benchmarks(benchmarks):
+    """Record all the benchmarks in the log"""
+    log.debug('Benchmarks')
+    log.debug('==========')
+
+    for bench in benchmarks:
+        log.debug(bench)
+
+    log.debug('')
+
+
+def compute_geomean(benchmarks, raw_data, rel_data):
+    """Compute the geometric mean and count the number of data points for the
+       supplied benchmarks, raw and optionally relative data. Return a
+       list of geometric mean and count of data, with a."""
+
+    geomean = 1.0
+    count = 0.0
+
+    for bench in benchmarks:
+        if gp['absolute']:
+            # Want absolute results. Ignore zero values
+            if bench in raw_data:
+                if raw_data[bench] > 0:
+                    count += 1
+                    geomean *= raw_data[bench]
+        else:
+            # Want relative results (the default). Ignore zero value
+            if bench in rel_data:
+                if rel_data[bench] > 0:
+                    count += 1
+                    geomean *= rel_data[bench]
+
+    if count > 0.0:
+        geomean = pow(geomean, 1.0 / count)
+
+    return geomean, count
+
+
+def compute_geosd(benchmarks, raw_data, rel_data, geomean, count):
+    """Compute geometric standard deviation for the given set of benchmarks,
+       using the supplied raw and optinally relative data. This draws on the
+       previously computed geometric mean and count for each benchmark.
+
+       Return geometric standard deviation."""
+    lnsize = 0.0
+    geosd = 0.0
+
+    for bench in benchmarks:
+        if gp['absolute']:
+            # Want absolute results
+            if raw_data[bench] > 0.0:
+                lnsize += math.pow(math.log(raw_data[bench] / geomean), 2)
+        else:
+            # Want relative results (the default).
+            if rel_data[bench] > 0.0:
+                lnsize += math.pow(math.log(rel_data[bench] / geomean), 2)
+
+    # Compute the standard deviation using the lnsize data for each benchmark.
+    if count > 0.0:
+        geosd = math.exp(math.sqrt(lnsize / count))
+
+    return geosd
+
+
+def compute_georange(geomean, geosd, count):
+    """Compute the geometric range of one geometric standard deviation around
+       the geometric mean.  Return the geometric range."""
+
+    georange = 0.0
+
+    if count > 0:
+        if geosd > 0.0:
+            georange = geomean * geosd - geomean / geosd
+        else:
+            georange = 0.0
+
+    return georange
+
+
+def embench_stats(benchmarks, raw_data, rel_data):
+    """Compute statistics summary for Embench."""
+    geomean, count = compute_geomean(benchmarks, raw_data, rel_data)
+    geosd = compute_geosd(benchmarks, raw_data, rel_data, geomean, count)
+    georange = compute_georange(geomean, geosd, count)
+    return geomean, geosd, georange
+
+
+def arglist_to_str(arglist):
+    """Make arglist into a string"""
+
+    for arg in arglist:
+        if arg == arglist[0]:
+            str = arg
+        else:
+            str = str + ' ' + arg
+
+    return str
diff --git a/pylib/run_gdbserver_sim.py b/pylib/run_gdbserver_sim.py
new file mode 100644
index 0000000..9979ee5
--- /dev/null
+++ b/pylib/run_gdbserver_sim.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+
+# Python module to run programs on a gdbserver with simulator
+
+# Copyright (C) 2019 Embecosm Limited
+#
+# Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Embench module to run benchmark programs.
+
+This version is suitable for a gdbserver with simulator.
+"""
+
+import argparse
+import re
+import subprocess
+
+from embench_core import log
+
+
+def get_target_args(remnant):
+    """Parse left over arguments"""
+    parser = argparse.ArgumentParser(description='Get target specific args')
+
+    parser.add_argument(
+        '--gdb-command',
+        type=str,
+        default='gdb',
+        help='Command to invoke GDB',
+    )
+    parser.add_argument(
+        '--gdbserver-command',
+        type=str,
+        default='gdbserver',
+        help='Command to invoke the GDB server',
+    )
+    parser.add_argument(
+        '--gdbserver-target',
+        type=str,
+        default='ri5cy',
+        help='target argument to gdbserver',
+    )
+
+    return parser.parse_args(remnant)
+
+
+def build_benchmark_cmd(path, args):
+    """Construct the command to run the benchmark.  "args" is a
+       namespace with target specific arguments"""
+
+    cmd = [f'{args.gdb_command}']
+    gdb_comms = [
+        'set confirm off',
+        'set style enabled off',
+        'set height 0',
+        'file {0}',
+        f'target remote | {args.gdbserver_command} '
+        + f'-c {args.gdbserver_target} --stdin',
+        'stepi',
+        'stepi',
+        'load',
+        'break start_trigger',
+        'break stop_trigger',
+        'break _exit',
+        'jump *_start',
+        'monitor cyclecount',
+        'continue',
+        'monitor cyclecount',
+        'continue',
+        'print $a0',
+        'detach',
+        'quit',
+    ]
+
+    for arg in gdb_comms:
+        cmd.extend(['-ex', arg.format(path)])
+
+    return cmd
+
+
+def decode_results(stdout_str, stderr_str):
+    """Extract the results from the output string of the run. Return the
+       elapsed time in milliseconds or zero if the run failed."""
+    # Return code is in standard output. We look for the string that means we
+    # hit a breakpoint on _exit, then for the string returning the value.
+    rcstr = re.search(
+        'Breakpoint 3,.*\$1 = (\d+)', stdout_str, re.S
+    )
+    if not rcstr:
+        log.debug('Warning: Failed to find return code')
+        return 0.0
+
+    # The start and end cycle counts are in the stderr string
+    times = re.search('(\d+)\D+(\d+)', stderr_str, re.S)
+    if times:
+        ms_elapsed = float(int(times.group(2)) - int(times.group(1))) / 1000.0
+        return ms_elapsed
+
+    # We must have failed to find a time
+    log.debug('Warning: Failed to find timing')
+    return 0.0
+
+def run_benchmark(bench, path, args):
+    """Runs the benchmark "bench" at "path". "args" is a namespace
+       with target specific arguments. This function will be called
+       in parallel unless if the number of tasks is limited via
+       command line. "run_benchmark" should return the result in
+       milliseconds.
+    """
+    arglist = build_benchmark_cmd(path, args)
+    try:
+        res = subprocess.run(
+            arglist,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=50,
+        )
+    except subprocess.TimeoutExpired:
+        log.warning(f'Warning: Run of {bench} timed out.')
+        return None
+    if res.returncode != 0:
+        return None
+    return decode_results(res.stdout.decode('utf-8'), res.stderr.decode('utf-8'))
diff --git a/pylib/run_native.py b/pylib/run_native.py
new file mode 100644
index 0000000..8639ece
--- /dev/null
+++ b/pylib/run_native.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+# Python module to run programs natively.
+
+# Copyright (C) 2019 Clemson University
+#
+# Contributor: Ola Jeppsson <ola.jeppsson@gmail.com>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Embench module to run benchmark programs.
+
+This version is suitable for running programs natively.
+"""
+
+import argparse
+import subprocess
+import re
+
+from embench_core import log
+
+
+def get_target_args(remnant):
+    """Parse left over arguments"""
+    parser = argparse.ArgumentParser(description='Get target specific args')
+
+    # No target arguments
+    return parser.parse_args(remnant)
+
+def decode_results(stdout_str, stderr_str):
+    """Extract the results from the output string of the run. Return the
+       elapsed time in milliseconds or zero if the run failed."""
+    # See above in build_benchmark_cmd how we record the return value and
+    # execution time. Return code is in standard output. Execution time is in
+    # standard error.
+
+    # Match "RET=rc"
+    rcstr = re.search('^RET=(\d+)', stdout_str, re.S | re.M)
+    if not rcstr:
+        log.debug('Warning: Failed to find return code')
+        return None
+
+    # Match "real s.mm?m?"
+    time = re.search('^real (\d+)[.](\d+)', stderr_str, re.S)
+    if time:
+        ms_elapsed = int(time.group(1)) * 1000 + \
+                     int(time.group(2).ljust(3,'0')) # 0-pad
+        # Return value cannot be zero (will be interpreted as error)
+        return max(float(ms_elapsed), 0.001)
+
+    # We must have failed to find a time
+    log.debug('Warning: Failed to find timing')
+    return None
+
+def run_benchmark(bench, path, args):
+    """Runs the benchmark "bench" at "path". "args" is a namespace
+       with target specific arguments. This function will be called
+       in parallel unless if the number of tasks is limited via
+       command line. "run_benchmark" should return the result in
+       milliseconds.
+    """
+
+    try:
+        res = subprocess.run(
+            ['sh', '-c', 'time -p ' + path + '; echo RET=$?'],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=50,
+        )
+    except subprocess.TimeoutExpired:
+        log.warning(f'Warning: Run of {bench} timed out.')
+        return None
+    if res.returncode != 0:
+        return None
+    return decode_results(res.stdout.decode('utf-8'), res.stderr.decode('utf-8'))
diff --git a/pylib/run_stm32f4-discovery.py b/pylib/run_stm32f4-discovery.py
new file mode 100644
index 0000000..9967fab
--- /dev/null
+++ b/pylib/run_stm32f4-discovery.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+
+# Python module to run programs on a stm32f4-discovery board
+
+# Copyright (C) 2019 Embecosm Limited
+#
+# Contributor: Jeremy Bennett <jeremy.bennett@embecosm.com>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Embench module to run benchmark programs.
+
+This version is suitable for a gdbserver with simulator.
+"""
+
+__all__ = [
+    'get_target_args',
+    'run_benchmark',
+]
+
+import argparse
+import subprocess
+import re
+
+from embench_core import log
+
+cpu_mhz = 1
+
+def get_target_args(remnant):
+    """Parse left over arguments"""
+    parser = argparse.ArgumentParser(description='Get target specific args')
+
+    parser.add_argument(
+        '--gdb-command',
+        type=str,
+        default='gdb',
+        help='Command to invoke GDB',
+    )
+    parser.add_argument(
+        '--gdbserver-command',
+        type=str,
+        default='gdbserver',
+        help='Command to invoke the GDB server',
+    )
+
+    return parser.parse_args(remnant)
+
+
+def build_benchmark_cmd(path, args):
+    """Construct the command to run the benchmark.  "args" is a
+       namespace with target specific arguments"""
+    cmd = [f'{args.gdb_command}']
+    gdb_comms = [
+        'set confirm off',
+        'file {0}',
+        'target extended-remote :3333',
+        'load',
+        'delete breakpoints',
+        'break start_trigger',
+        'break stop_trigger',
+        'break AtExit',
+        'continue',
+        'print /u *0xe0001004',
+        'continue',
+        'print /u *0xe0001004',
+        'continue',
+        'print /u $r0',
+        'quit',
+    ]
+
+    for arg in gdb_comms:
+        cmd.extend(['-ex', arg.format(path)])
+
+    return cmd
+
+
+def decode_results(stdout_str, args):
+    """Extract the results from the output string of the run. Return the
+       elapsed time in milliseconds or zero if the run failed."""
+    # Return code is in standard output. We look for the string that means we
+    # hit a breakpoint on _exit, then for the string returning the value.
+    rcstr = re.search(
+        'Breakpoint 3,.*\$3 = (\d+)', stdout_str, re.S
+    )
+    if not rcstr:
+        log.debug('Warning: Failed to find return code')
+        return 0.0
+    if int(rcstr.group(1)) != 0:
+        log.debug('Warning: Error return code')
+
+    # The start and end cycle counts are in the stdout string
+    starttime = re.search('\$1 = (\d+)', stdout_str, re.S)
+    endtime = re.search('\$2 = (\d+)', stdout_str, re.S)
+    if not starttime or not endtime:
+        log.debug('Warning: Failed to find timing')
+        return 0.0
+
+    # Time from cycles to milliseconds
+    cycles = int(endtime.group(1)) - int(starttime.group(1))
+    return cycles / args.cpu_mhz / 1000.0
+
+def run_benchmark(bench, path, args):
+    """Runs the benchmark "bench" at "path". "args" is a namespace
+       with target specific arguments. This function will be called
+       in parallel unless if the number of tasks is limited via
+       command line. "run_benchmark" should return the result in
+       milliseconds.
+    """
+    arglist = build_benchmark_cmd(path, args)
+    try:
+        res = subprocess.run(
+            arglist,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=50,
+        )
+    except subprocess.TimeoutExpired:
+        log.warning(f'Warning: Run of {bench} timed out.')
+        return None
+    if res.returncode != 0:
+        print ('Non-zero return code')
+        return None
+    return decode_results(res.stdout.decode('utf-8'), args)
diff --git a/pylib/run_wally.py b/pylib/run_wally.py
new file mode 100644
index 0000000..96febaf
--- /dev/null
+++ b/pylib/run_wally.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+# Python module to open and decode results from Wally.
+
+# Copyright (C) 2022 Embecosm Limited and University of Bristol
+#
+# Contributor: Daniel Torres <dtorres@hmc.edu>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""
+Embench module to run benchmark programs.
+
+This version is suitable for running programs on wally.
+"""
+
+import argparse
+import subprocess
+import os
+import configparser
+import re
+
+from embench_core import log
+
+cpu_mhz = 1
+
+def get_target_args(remnant):
+    """Parse left over arguments"""
+    parser = argparse.ArgumentParser(description='Get target specific args')
+
+    # No target arguments
+    parser.add_argument(
+        '--cpu-mhz',
+        type=int,
+        default=1,
+        help='Processor clock speed in MHz'
+    )
+
+    return parser.parse_args(remnant)
+
+
+def build_benchmark_cmd(path, args):
+    """Construct the command to run the benchmark.  "args" is a
+       namespace with target specific arguments"""
+    global cpu_mhz
+    cpu_mhz = args.cpu_mhz
+    # to run wally, we use the modelsim that inputs the compiled C code and outputs a .outputfile
+    # that contains the content of begin_signature, which writes the instret & cycles of begin & end triggers
+    # along with the return code, which tells us if the test passed
+    log.debug("\"" + path + "\" : cycles, insret, CPI, Elapsed Time, ClkFreq")
+    return ['sh', '-c', (f'cat {os.path.dirname(path)}*.output')]
+
+def decode_results(stdout_str, stderr_str):
+    """Extract the results from the output string of the run. Return the
+       elapsed time in milliseconds or zero if the run failed."""
+    # this reads in the output of the buildbench_cmd command, in this case we have 5 lines written to stdout_str
+    # that contains the content of begin_signature, which writes the instret & cycles of begin & end triggers
+    # along with the return code, which tells us if the test passed
+    output_signature = stdout_str.split('\n')[0:6]
+    if (len(output_signature)):
+        pc_trigger = list(map(lambda s: int(s,16), output_signature[0:5]))
+    else:
+        log.debug('Warning: Output file empty')
+        result = 0.0
+    
+    # get the cpu_mhz from input variable of benchmark_speed.py
+    global cpu_mhz
+    # check if either pc value is the default (i.e. never got written to)
+    if (pc_trigger[4]!=1):
+        log.debug('Warning: Simulation returned failure in signature')
+    if ((pc_trigger[1]==0)|(pc_trigger[0]==0)):
+        log.debug('Warning: Failed to find timing')
+        result = 0.0
+    else:
+        result = ((pc_trigger)[1]-(pc_trigger)[0]) / cpu_mhz / 1000.0
+
+    # log.debug('Simulation returned %d. 1 is Success, 3 is Failure', pc_trigger[4])
+    # cycles, #insret, #CPI, Elapsed Time, ClkFreq
+    log.debug( "[" + str((pc_trigger)[1]-(pc_trigger)[0]) + "," +  str(pc_trigger[3]-pc_trigger[2]) + "," + str((pc_trigger[1]-pc_trigger[0]) / (pc_trigger[3]-pc_trigger[2])) + "," + str(result) + "," + str(cpu_mhz) + "],") 
+
+    return (result)
+
+def run_benchmark(bench, path, args):
+    """Runs the benchmark "bench" at "path". "args" is a namespace
+       with target specific arguments. This function will be called
+       in parallel unless if the number of tasks is limited via
+       command line. "run_benchmark" should return the result in
+       milliseconds.
+    """
+    arglist = build_benchmark_cmd(path, args)
+    try:
+        res = subprocess.run(
+            arglist,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=50,
+        )
+    except subprocess.TimeoutExpired:
+        log.warning(f'Warning: Run of {bench} timed out.')
+        return None
+    if res.returncode != 0:
+        return None
+    return decode_results(res.stdout.decode('utf-8'), res.stderr.decode('utf-8'))
diff --git a/runcmds.txt b/runcmds.txt
deleted file mode 100644
index af114f7..0000000
--- a/runcmds.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Native compile/run
-------------------------------------------
-make run TEST=biquad_cascade_df2T_f32 CFG=sos3_n128
-make run TEST=biquad_cascade_df2T_f32 CFG=sos3_n1
-make run TEST=dct4_512_f32 CFG=default
-make run TEST=dct4_2048_f32 CFG=default
-make run TEST=fir_f32 CFG=taps256_n128
-make run TEST=fir_f32 CFG=taps256_n1
-make run TEST=rfft512_f32 CFG=default
-make run TEST=rfft2048_f32 CFG=default
-make run TEST=hello
-
-Cross-compile for STM32F4 Discovery board
-------------------------------------------
-speed
-=====
-make rebuild TEST=biquad_cascade_df2T_f32 CFG=sos3_n128 TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=biquad_cascade_df2T_f32 CFG=sos3_n1 TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=dct4_512_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=dct4_2048_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=fir_f32 CFG=taps256_n128 TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=fir_f32 CFG=taps256_n1 TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=rfft512_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-make rebuild TEST=rfft2048_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=speed
-
-size
-=====
-make rebuild TEST=biquad_cascade_df2T_f32 CFG=sos3_n128 TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=biquad_cascade_df2T_f32 CFG=sos3_n1 TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=dct4_512_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=dct4_2048_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=fir_f32 CFG=taps256_n128 TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=fir_f32 CFG=taps256_n1 TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=rfft512_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=size
-make rebuild TEST=rfft2048_f32 CFG=default TARGET=stm32f4discovery TOOL=armgcc OPT=size
-
diff --git a/sconstruct.py b/sconstruct.py
new file mode 100644
index 0000000..9b208e0
--- /dev/null
+++ b/sconstruct.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+
+# Script to build all benchmarks
+
+# Copyright (C) 2017, 2024 Embecosm Limited
+#
+# Contributor: Konrad Moron <konrad.moron@tum.de>
+#
+# This file is part of Embench.
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+from pathlib import Path
+import os
+
+def find_benchmarks(bd, env):
+    dir_iter = Path('src').iterdir()
+    return ([bench for bench in dir_iter if bench.is_dir()] + [env['dummy_benchmark']])
+
+def parse_options():
+    num_cpu = int(os.environ.get('NUM_CPU', 2))
+    SetOption('num_jobs', num_cpu)
+    AddOption('--build-dir', nargs=1, type='string', default='bd')
+    AddOption('--config-dir', nargs=1, type='string', default='config2')
+    config_dir = Path(GetOption('config_dir')).absolute()
+    bd = Path(GetOption('build_dir')).absolute()
+
+    vars = Variables(None, ARGUMENTS)
+    print(ARGUMENTS)
+    vars.Add('cc', default=env['CC'])
+    vars.Add('cflags', default=env['CCFLAGS'])
+    vars.Add('ld', default=env['LINK'])
+    vars.Add('ldflags', default=env['LINKFLAGS'])
+    vars.Add('user_libs', default=[])
+    vars.Add('warmup_heat', default=1,
+             help='Number of iterations to warm up caches before measurements')
+    vars.Add('gsf', default=1, help='Global scale factor')
+    vars.Add('dummy_benchmark', default=(bd / 'support/dummy-benchmark'))
+    return vars
+
+def setup_directories(bd, config_dir):
+    VariantDir(bd / "src", "src")
+    VariantDir(bd / "support", "support")
+    VariantDir(bd / "config", config_dir)
+    SConsignFile(bd / ".sconsign.dblite")
+
+def populate_build_env(env, vars):
+    vars.Update(env)
+    env.Append(CPPDEFINES={ 'WARMUP_HEAT' : '${warmup_heat}',
+                            'GLOBAL_SCALE_FACTOR' : '${gsf}'})
+    env.Append(CPPPATH=['support', config_dir])
+    env.Replace(CCFLAGS = "${cflags}")
+    env.Replace(LINKFLAGS = "${ldflags}")
+    env.Replace(CC = "${cc}")
+    env.Replace(LINK = "${ld}")
+    print(f"{env['user_libs']}".split())
+    env.Prepend(LIBS = f"{env['user_libs']}".split())
+
+def build_support_objects(env):
+    support_objects = []
+    support_objects += env.Object(Glob(str(bd / "support" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "BasicMathFunctions" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "CommonTables" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "ComplexMathFunctions" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "dsp" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "FilteringFunctions" / "*.c")))
+    support_objects += env.Object(Glob(str(bd / "support" / "TransformFunctions" / "*.c")))
+    support_objects += env.Object(str(bd / "config/boardsupport.c"))
+    env.Default(support_objects)
+    return support_objects
+
+
+# MAIN BUILD SCRIPT
+#env = DefaultEnvironment()
+env = Environment(ENV=os.environ.copy())
+vars = parse_options()
+
+bd = Path(GetOption('build_dir')).absolute()
+config_dir = Path(GetOption('config_dir')).absolute()
+
+setup_directories(bd, config_dir)
+env.Replace(BUILD_DIR=bd)
+env.Replace(CONFIG_DIR=config_dir)
+populate_build_env(env, vars)
+
+# Setup Help Text
+env.Help("\nCustomizable Variables:", append=True)
+env.Help(vars.GenerateHelpText(env), append=True)
+
+support_objects = build_support_objects(env)
+benchmark_paths = find_benchmarks(bd, env)
+
+benchmark_objects = {
+    (bd / bench / bench.name): env.Object(Glob(str(bd / bench / "*.c")))
+
+    for bench in benchmark_paths
+}
+env.Default(benchmark_objects.values())
+
+for benchname, objects in benchmark_objects.items():
+    bench_exe = env.Program(str(benchname), objects + support_objects)
+    env.Default(bench_exe)
diff --git a/source/CommonTables/CommonTables.c b/source/CommonTables/CommonTables.c
deleted file mode 100644
index 6170b9c..0000000
--- a/source/CommonTables/CommonTables.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        CommonTables.c
- * Description:  Combination of all common table source files.
- *
- * $Date:        08. January 2020
- * $Revision:    V1.1.0
- *
- * Target Processor: Cortex-M cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arm_common_tables.c"
-#include "arm_const_structs.c"
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n1/coeff.c b/src/biquad_cascade_df2T_f32_sos3_n1/coeff.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n1/coeff.c
rename to src/biquad_cascade_df2T_f32_sos3_n1/coeff.c
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n1/data.h b/src/biquad_cascade_df2T_f32_sos3_n1/data.h
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n1/data.h
rename to src/biquad_cascade_df2T_f32_sos3_n1/data.h
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n1/in.c b/src/biquad_cascade_df2T_f32_sos3_n1/in.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n1/in.c
rename to src/biquad_cascade_df2T_f32_sos3_n1/in.c
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n1/out.c b/src/biquad_cascade_df2T_f32_sos3_n1/out.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n1/out.c
rename to src/biquad_cascade_df2T_f32_sos3_n1/out.c
diff --git a/tests/biquad_cascade_df2T_f32/test_main.c b/src/biquad_cascade_df2T_f32_sos3_n1/test_main.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/test_main.c
rename to src/biquad_cascade_df2T_f32_sos3_n1/test_main.c
diff --git a/tests/biquad_cascade_df2T_f32/test_main.h b/src/biquad_cascade_df2T_f32_sos3_n1/test_main.h
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/test_main.h
rename to src/biquad_cascade_df2T_f32_sos3_n1/test_main.h
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n128/coeff.c b/src/biquad_cascade_df2T_f32_sos3_n128/coeff.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n128/coeff.c
rename to src/biquad_cascade_df2T_f32_sos3_n128/coeff.c
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n128/data.h b/src/biquad_cascade_df2T_f32_sos3_n128/data.h
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n128/data.h
rename to src/biquad_cascade_df2T_f32_sos3_n128/data.h
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n128/in.c b/src/biquad_cascade_df2T_f32_sos3_n128/in.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n128/in.c
rename to src/biquad_cascade_df2T_f32_sos3_n128/in.c
diff --git a/tests/biquad_cascade_df2T_f32/cfg/sos3_n128/out.c b/src/biquad_cascade_df2T_f32_sos3_n128/out.c
similarity index 100%
rename from tests/biquad_cascade_df2T_f32/cfg/sos3_n128/out.c
rename to src/biquad_cascade_df2T_f32_sos3_n128/out.c
diff --git a/src/biquad_cascade_df2T_f32_sos3_n128/test_main.c b/src/biquad_cascade_df2T_f32_sos3_n128/test_main.c
new file mode 100644
index 0000000..388630a
--- /dev/null
+++ b/src/biquad_cascade_df2T_f32_sos3_n128/test_main.c
@@ -0,0 +1,89 @@
+#include <stdint.h>
+#include <stdio.h>
+
+// includes for every benchmark
+#include "boardsupport.h"
+
+// includes for this benchmark
+#include "test_main.h"
+#include "snr.h"
+
+/**
+ * @brief Filter output
+ */
+
+static float32_t output[N_SAMPLES];
+
+
+
+/**
+ * @brief Filter state
+ */
+
+static float32_t filter_state [2*N_STAGES];
+
+
+/**
+ * @brief test_main
+ * 
+ */
+int __attribute__ ((used)) test_main (int argc __attribute__ ((unused)), char *argv[] __attribute__ ((unused)))
+{
+  uint32_t ccnt;
+  uint32_t fail_count = 0;
+  float32_t output_initial[N_INITIAL];
+
+  // filter initialization
+  arm_biquad_cascade_df2T_instance_f32 filter_S;
+  arm_biquad_cascade_df2T_init_f32(&filter_S, N_STAGES, coeff, filter_state);
+
+  // ignore the noisy outputs due to initial conditions
+  arm_biquad_cascade_df2T_f32(&filter_S, input, output_initial, N_INITIAL);
+
+  // begin profiling
+  start_trigger();
+
+  arm_biquad_cascade_df2T_f32(&filter_S, input + N_INITIAL, output, N_SAMPLES);
+
+  // end profiling
+  stop_trigger();
+
+  // get the cycle count
+  ccnt = get_ccnt();
+
+  #ifndef NO_SNR_CHECK
+    // calculate SNR of test output vs matlab reference output
+    float32_t snr;
+    snr = snr_f32(output_ref, output, N_SAMPLES);
+
+    // check correctness (if reference and actual filter outputs matched)
+    fail_count += (snr < SNR_REF_THLD);
+
+    printf("SNR = %i\n", (int)snr);
+
+    // print output vs reference
+    // for (uint16_t i = 0; i < N_SAMPLES; i++)
+    // {
+    //   printf("output[%d]=%f, expected=%f\n", i, output[i], output_ref[i]);
+    // }
+
+    // print to a python list, useful for debug
+    // uint32_t ptr;
+    // for (ptr = 0; ptr < N_SAMPLES; ptr++)
+    // {
+    //     if (ptr == 0)
+    //         printf("output = [%f, ", *(output + ptr));
+    //     else if (ptr == (N_SAMPLES-1))
+    //         printf("%f]\n", *(output + ptr));
+    //     else
+    //         printf("%f,", *(output + ptr));
+    // }
+  #endif
+
+  if (fail_count)
+    printf("TEST FAIL\nCCNT = %i\n", ccnt);
+  else
+    printf("TEST PASS\nCCNT = %i\n", ccnt);
+
+  return !(fail_count == 0);
+}
diff --git a/tests/dct4_2048_f32/test_main.h b/src/biquad_cascade_df2T_f32_sos3_n128/test_main.h
similarity index 100%
rename from tests/dct4_2048_f32/test_main.h
rename to src/biquad_cascade_df2T_f32_sos3_n128/test_main.h
diff --git a/tests/dct4_2048_f32/cfg/default/data.h b/src/dct4_2048_f32/data.h
similarity index 100%
rename from tests/dct4_2048_f32/cfg/default/data.h
rename to src/dct4_2048_f32/data.h
diff --git a/tests/dct4_2048_f32/cfg/default/in.c b/src/dct4_2048_f32/in.c
similarity index 100%
rename from tests/dct4_2048_f32/cfg/default/in.c
rename to src/dct4_2048_f32/in.c
diff --git a/tests/dct4_2048_f32/cfg/default/out.c b/src/dct4_2048_f32/out.c
similarity index 100%
rename from tests/dct4_2048_f32/cfg/default/out.c
rename to src/dct4_2048_f32/out.c
diff --git a/tests/dct4_2048_f32/test_main.c b/src/dct4_2048_f32/test_main.c
similarity index 100%
rename from tests/dct4_2048_f32/test_main.c
rename to src/dct4_2048_f32/test_main.c
diff --git a/tests/dct4_512_f32/test_main.h b/src/dct4_2048_f32/test_main.h
similarity index 100%
rename from tests/dct4_512_f32/test_main.h
rename to src/dct4_2048_f32/test_main.h
diff --git a/tests/dct4_512_f32/cfg/default/data.h b/src/dct4_512_f32/data.h
similarity index 100%
rename from tests/dct4_512_f32/cfg/default/data.h
rename to src/dct4_512_f32/data.h
diff --git a/tests/dct4_512_f32/cfg/default/in.c b/src/dct4_512_f32/in.c
similarity index 100%
rename from tests/dct4_512_f32/cfg/default/in.c
rename to src/dct4_512_f32/in.c
diff --git a/tests/dct4_512_f32/cfg/default/out.c b/src/dct4_512_f32/out.c
similarity index 100%
rename from tests/dct4_512_f32/cfg/default/out.c
rename to src/dct4_512_f32/out.c
diff --git a/tests/dct4_512_f32/test_main.c b/src/dct4_512_f32/test_main.c
similarity index 100%
rename from tests/dct4_512_f32/test_main.c
rename to src/dct4_512_f32/test_main.c
diff --git a/tests/fir_f32/test_main.h b/src/dct4_512_f32/test_main.h
similarity index 100%
rename from tests/fir_f32/test_main.h
rename to src/dct4_512_f32/test_main.h
diff --git a/tests/fir_f32/cfg/taps256_n1/coeff.c b/src/fir_f32_taps256_n1/coeff.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n1/coeff.c
rename to src/fir_f32_taps256_n1/coeff.c
diff --git a/tests/fir_f32/cfg/taps256_n1/data.h b/src/fir_f32_taps256_n1/data.h
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n1/data.h
rename to src/fir_f32_taps256_n1/data.h
diff --git a/tests/fir_f32/cfg/taps256_n1/in.c b/src/fir_f32_taps256_n1/in.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n1/in.c
rename to src/fir_f32_taps256_n1/in.c
diff --git a/tests/fir_f32/cfg/taps256_n1/out.c b/src/fir_f32_taps256_n1/out.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n1/out.c
rename to src/fir_f32_taps256_n1/out.c
diff --git a/tests/fir_f32/test_main.c b/src/fir_f32_taps256_n1/test_main.c
similarity index 100%
rename from tests/fir_f32/test_main.c
rename to src/fir_f32_taps256_n1/test_main.c
diff --git a/tests/rfft2048_f32/test_main.h b/src/fir_f32_taps256_n1/test_main.h
similarity index 100%
rename from tests/rfft2048_f32/test_main.h
rename to src/fir_f32_taps256_n1/test_main.h
diff --git a/tests/fir_f32/cfg/taps256_n128/coeff.c b/src/fir_f32_taps256_n128/coeff.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n128/coeff.c
rename to src/fir_f32_taps256_n128/coeff.c
diff --git a/tests/fir_f32/cfg/taps256_n128/data.h b/src/fir_f32_taps256_n128/data.h
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n128/data.h
rename to src/fir_f32_taps256_n128/data.h
diff --git a/tests/fir_f32/cfg/taps256_n128/in.c b/src/fir_f32_taps256_n128/in.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n128/in.c
rename to src/fir_f32_taps256_n128/in.c
diff --git a/tests/fir_f32/cfg/taps256_n128/out.c b/src/fir_f32_taps256_n128/out.c
similarity index 100%
rename from tests/fir_f32/cfg/taps256_n128/out.c
rename to src/fir_f32_taps256_n128/out.c
diff --git a/src/fir_f32_taps256_n128/test_main.c b/src/fir_f32_taps256_n128/test_main.c
new file mode 100644
index 0000000..9bb4512
--- /dev/null
+++ b/src/fir_f32_taps256_n128/test_main.c
@@ -0,0 +1,73 @@
+#include <stdint.h>
+#include <stdio.h>
+
+// includes for every benchmark
+#include "boardsupport.h"
+
+// includes for this benchmark
+#include "test_main.h"
+#include "snr.h"
+
+/**
+ * @brief Filter state and output
+ */
+
+static float32_t output[N_SAMPLES];
+static float32_t filter_state [N_TAPS + N_SAMPLES - 1];
+
+
+/**
+ * @brief test_main
+ * 
+ */
+int __attribute__ ((used)) test_main (int argc __attribute__ ((unused)), char *argv[] __attribute__ ((unused)))
+{
+  uint32_t ccnt;
+  uint32_t fail_count = 0;
+  float32_t output_initial[N_TAPS];
+  uint32_t ptr;
+
+  // filter initialization
+  arm_fir_instance_f32 filter_S;
+  arm_fir_init_f32(&filter_S, N_TAPS, coeff, filter_state, N_SAMPLES);
+
+  // ignore the first N_TAPS outputs (bad output based on zero initial state)
+  for (ptr = 0; ptr < (N_TAPS/N_SAMPLES); ptr++)
+  {
+    arm_fir_f32(&filter_S, input + (ptr * N_SAMPLES), output_initial + (ptr * N_SAMPLES), N_SAMPLES);
+  }
+
+  // begin profiling
+  start_trigger();
+
+  arm_fir_f32(&filter_S, (input + N_TAPS), output, N_SAMPLES);
+
+  // end profiling
+  stop_trigger();
+
+  // get the cycle count
+  ccnt = get_ccnt();
+
+  #ifndef NO_SNR_CHECK
+    // calculate SNR of test output vs golden reference
+    float32_t snr;
+    snr = snr_f32(output_ref, output, N_SAMPLES);
+
+    printf("SNR=%d\n", (int)snr);
+
+    // for (uint16_t i = 0; i < N_SAMPLES; i++)
+    // {
+    //   printf("output[%d]=%f, expected=%f\n", i, output[i], output_ref[i]);
+    // }
+
+    // check correctness (if reference and actual filter outputs matched)
+    fail_count += (snr < SNR_REF_THLD);
+  #endif
+
+  if (fail_count)
+    printf("TEST FAIL\n");
+  else
+    printf("TEST PASS\nCCNT = %i\n", ccnt);
+
+  return !(fail_count == 0);
+}
diff --git a/tests/rfft512_f32/test_main.h b/src/fir_f32_taps256_n128/test_main.h
similarity index 100%
rename from tests/rfft512_f32/test_main.h
rename to src/fir_f32_taps256_n128/test_main.h
diff --git a/tests/rfft2048_f32/cfg/default/data.h b/src/rfft2048_f32/data.h
similarity index 100%
rename from tests/rfft2048_f32/cfg/default/data.h
rename to src/rfft2048_f32/data.h
diff --git a/tests/rfft2048_f32/cfg/default/in.c b/src/rfft2048_f32/in.c
similarity index 100%
rename from tests/rfft2048_f32/cfg/default/in.c
rename to src/rfft2048_f32/in.c
diff --git a/tests/rfft2048_f32/cfg/default/out.c b/src/rfft2048_f32/out.c
similarity index 100%
rename from tests/rfft2048_f32/cfg/default/out.c
rename to src/rfft2048_f32/out.c
diff --git a/tests/rfft2048_f32/test_main.c b/src/rfft2048_f32/test_main.c
similarity index 100%
rename from tests/rfft2048_f32/test_main.c
rename to src/rfft2048_f32/test_main.c
diff --git a/src/rfft2048_f32/test_main.h b/src/rfft2048_f32/test_main.h
new file mode 100644
index 0000000..cd73895
--- /dev/null
+++ b/src/rfft2048_f32/test_main.h
@@ -0,0 +1,6 @@
+#ifndef _TEST_MAIN_H_
+#define _TEST_MAIN_H_
+
+#include "data.h"
+
+#endif /* _TEST_MAIN_H_ */
\ No newline at end of file
diff --git a/tests/rfft512_f32/cfg/default/data.h b/src/rfft512_f32/data.h
similarity index 100%
rename from tests/rfft512_f32/cfg/default/data.h
rename to src/rfft512_f32/data.h
diff --git a/tests/rfft512_f32/cfg/default/in.c b/src/rfft512_f32/in.c
similarity index 100%
rename from tests/rfft512_f32/cfg/default/in.c
rename to src/rfft512_f32/in.c
diff --git a/tests/rfft512_f32/cfg/default/out.c b/src/rfft512_f32/out.c
similarity index 100%
rename from tests/rfft512_f32/cfg/default/out.c
rename to src/rfft512_f32/out.c
diff --git a/tests/rfft512_f32/test_main.c b/src/rfft512_f32/test_main.c
similarity index 100%
rename from tests/rfft512_f32/test_main.c
rename to src/rfft512_f32/test_main.c
diff --git a/src/rfft512_f32/test_main.h b/src/rfft512_f32/test_main.h
new file mode 100644
index 0000000..cd73895
--- /dev/null
+++ b/src/rfft512_f32/test_main.h
@@ -0,0 +1,6 @@
+#ifndef _TEST_MAIN_H_
+#define _TEST_MAIN_H_
+
+#include "data.h"
+
+#endif /* _TEST_MAIN_H_ */
\ No newline at end of file
diff --git a/stm32cube_project.zip b/stm32cube_project.zip
deleted file mode 100644
index 8b933dc..0000000
Binary files a/stm32cube_project.zip and /dev/null differ
diff --git a/source/BasicMathFunctions/arm_mult_f32.c b/support/BasicMathFunctions/arm_mult_f32.c
similarity index 100%
rename from source/BasicMathFunctions/arm_mult_f32.c
rename to support/BasicMathFunctions/arm_mult_f32.c
diff --git a/source/BasicMathFunctions/arm_scale_f32.c b/support/BasicMathFunctions/arm_scale_f32.c
similarity index 100%
rename from source/BasicMathFunctions/arm_scale_f32.c
rename to support/BasicMathFunctions/arm_scale_f32.c
diff --git a/source/CommonTables/arm_common_tables.c b/support/CommonTables/arm_common_tables.c
similarity index 100%
rename from source/CommonTables/arm_common_tables.c
rename to support/CommonTables/arm_common_tables.c
diff --git a/source/CommonTables/arm_const_structs.c b/support/CommonTables/arm_const_structs.c
similarity index 100%
rename from source/CommonTables/arm_const_structs.c
rename to support/CommonTables/arm_const_structs.c
diff --git a/source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c b/support/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
similarity index 100%
rename from source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
rename to support/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
diff --git a/source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c b/support/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
similarity index 100%
rename from source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
rename to support/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
diff --git a/source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c b/support/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
similarity index 100%
rename from source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
rename to support/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
diff --git a/source/FilteringFunctions/arm_fir_f32.c b/support/FilteringFunctions/arm_fir_f32.c
similarity index 100%
rename from source/FilteringFunctions/arm_fir_f32.c
rename to support/FilteringFunctions/arm_fir_f32.c
diff --git a/source/FilteringFunctions/arm_fir_init_f32.c b/support/FilteringFunctions/arm_fir_init_f32.c
similarity index 100%
rename from source/FilteringFunctions/arm_fir_init_f32.c
rename to support/FilteringFunctions/arm_fir_init_f32.c
diff --git a/source/TransformFunctions/arm_bitreversal2.c b/support/TransformFunctions/arm_bitreversal2.c
similarity index 100%
rename from source/TransformFunctions/arm_bitreversal2.c
rename to support/TransformFunctions/arm_bitreversal2.c
diff --git a/source/TransformFunctions/arm_cfft_f32.c b/support/TransformFunctions/arm_cfft_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_cfft_f32.c
rename to support/TransformFunctions/arm_cfft_f32.c
diff --git a/source/TransformFunctions/arm_cfft_init_f32.c b/support/TransformFunctions/arm_cfft_init_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_cfft_init_f32.c
rename to support/TransformFunctions/arm_cfft_init_f32.c
diff --git a/source/TransformFunctions/arm_cfft_radix8_f32.c b/support/TransformFunctions/arm_cfft_radix8_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_cfft_radix8_f32.c
rename to support/TransformFunctions/arm_cfft_radix8_f32.c
diff --git a/source/TransformFunctions/arm_dct4_f32.c b/support/TransformFunctions/arm_dct4_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_dct4_f32.c
rename to support/TransformFunctions/arm_dct4_f32.c
diff --git a/source/TransformFunctions/arm_rfft_fast_f32.c b/support/TransformFunctions/arm_rfft_fast_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_rfft_fast_f32.c
rename to support/TransformFunctions/arm_rfft_fast_f32.c
diff --git a/source/TransformFunctions/arm_rfft_fast_init_f32.c b/support/TransformFunctions/arm_rfft_fast_init_f32.c
similarity index 100%
rename from source/TransformFunctions/arm_rfft_fast_init_f32.c
rename to support/TransformFunctions/arm_rfft_fast_init_f32.c
diff --git a/include/arm_common_tables.h b/support/arm_common_tables.h
similarity index 100%
rename from include/arm_common_tables.h
rename to support/arm_common_tables.h
diff --git a/include/arm_const_structs.h b/support/arm_const_structs.h
similarity index 100%
rename from include/arm_const_structs.h
rename to support/arm_const_structs.h
diff --git a/include/arm_math.h b/support/arm_math.h
similarity index 100%
rename from include/arm_math.h
rename to support/arm_math.h
diff --git a/include/arm_math_types.h b/support/arm_math_types.h
similarity index 100%
rename from include/arm_math_types.h
rename to support/arm_math_types.h
diff --git a/include/dsp/basic_math_functions.h b/support/dsp/basic_math_functions.h
similarity index 100%
rename from include/dsp/basic_math_functions.h
rename to support/dsp/basic_math_functions.h
diff --git a/include/dsp/complex_math_functions.h b/support/dsp/complex_math_functions.h
similarity index 100%
rename from include/dsp/complex_math_functions.h
rename to support/dsp/complex_math_functions.h
diff --git a/include/dsp/fast_math_functions.h b/support/dsp/fast_math_functions.h
similarity index 100%
rename from include/dsp/fast_math_functions.h
rename to support/dsp/fast_math_functions.h
diff --git a/include/dsp/filtering_functions.h b/support/dsp/filtering_functions.h
similarity index 100%
rename from include/dsp/filtering_functions.h
rename to support/dsp/filtering_functions.h
diff --git a/include/dsp/none.h b/support/dsp/none.h
similarity index 100%
rename from include/dsp/none.h
rename to support/dsp/none.h
diff --git a/include/dsp/support_functions.h b/support/dsp/support_functions.h
similarity index 100%
rename from include/dsp/support_functions.h
rename to support/dsp/support_functions.h
diff --git a/include/dsp/transform_functions.h b/support/dsp/transform_functions.h
similarity index 100%
rename from include/dsp/transform_functions.h
rename to support/dsp/transform_functions.h
diff --git a/include/dsp/utils.h b/support/dsp/utils.h
similarity index 100%
rename from include/dsp/utils.h
rename to support/dsp/utils.h
diff --git a/support/dummy-benchmark/dummy.c b/support/dummy-benchmark/dummy.c
new file mode 100644
index 0000000..8f06ae7
--- /dev/null
+++ b/support/dummy-benchmark/dummy.c
@@ -0,0 +1,14 @@
+/* Common dummy benchmark
+
+   Copyright (C) 2018-2024 Embecosm Limited
+
+   Contributor: Konrad Moron <konrad.moron@tum.de>
+
+   SPDX-License-Identifier: GPL-3.0-or-later */
+
+/* This is just a wrapper for the board specific support file. */
+int __attribute__ ((used, noinline)) test_main (int argc __attribute__ ((unused)), char *argv[] __attribute__ ((unused))) {
+  return 0;
+}
+
+#undef MAGIC
\ No newline at end of file
diff --git a/main.c b/support/main.c
similarity index 86%
rename from main.c
rename to support/main.c
index 4811b28..4af3b27 100644
--- a/main.c
+++ b/support/main.c
@@ -1,7 +1,7 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "boardsupport.h"
+#include "support.h"
 
 extern int test_main();
 
@@ -18,7 +18,5 @@ int __attribute__ ((used)) main (int argc __attribute__ ((unused)), char *argv[]
     
     res = test_main();
 
-    while(1){};
-
     return res;
 }
diff --git a/common/snr.c b/support/snr.c
similarity index 100%
rename from common/snr.c
rename to support/snr.c
diff --git a/common/snr.h b/support/snr.h
similarity index 100%
rename from common/snr.h
rename to support/snr.h
diff --git a/targets/boardsupport.h b/support/support.h
similarity index 83%
rename from targets/boardsupport.h
rename to support/support.h
index c07da83..c58a501 100644
--- a/targets/boardsupport.h
+++ b/support/support.h
@@ -7,12 +7,13 @@
 
    SPDX-License-Identifier: GPL-3.0-or-later */
 
-#ifndef _BOARDSUPPORT_H_
-#define _BOARDSUPPORT_H_
+#ifndef _SUPPORT_H_
+#define _SUPPORT_H_
 
 void init_board(void);
 void start_trigger(void);
 void stop_trigger(void);
 int get_ccnt(void);
 
-#endif // _BOARDSUPPORT_H_
+#endif
+
diff --git a/targets/native/target.mk b/targets/native/target.mk
deleted file mode 100644
index 4ec2c5a..0000000
--- a/targets/native/target.mk
+++ /dev/null
@@ -1,37 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TGT_SRC    :=
-TGT_INC    :=
-TGT_DEF    :=
-TGT_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TGT_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-
-##############################################################
-# TARGET
-##############################################################
-
-### DEFINES
-TGT_DEF    +=
-
-### INCLUDES
-TGT_INC    +=-I $(TGT_DIR)/..
-
-### FLAGS
-TGT_FLG    +=$(TGT_DEF)
-TGT_LD     +=
-
-### SOURCES
-TGT_SRC    +=$(TGT_DIR)/boardsupport.c
-
-### OBJECTS
-TGT_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TGT_SRC))))
diff --git a/tests/biquad_cascade_df2T_f32/python/biquad_sos3.py b/tests/biquad_cascade_df2T_f32/python/biquad_sos3.py
deleted file mode 100755
index 836a9c4..0000000
--- a/tests/biquad_cascade_df2T_f32/python/biquad_sos3.py
+++ /dev/null
@@ -1,229 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy import signal
-
-# USER: select number of samples
-#   desired clean output signal length (beyond initial noisy output)
-N             = 128
-
-# USER: script settings
-en_plots      = False
-en_filegen    = False
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-coeff_fnm     = "coeff.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "input"
-coeff_arr_nm  = "coeff"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, n_sos, t_samples, n_samples, snr_ref, input_arr_nm, output_arr_nm, coeff_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define N_STAGES        ({:d})\n".format(n_sos))
-    f.write("#define TOTAL_SAMPLES   ({:d})\n".format(t_samples))
-    f.write("#define N_SAMPLES       ({:d})\n".format(n_samples))
-    f.write("#define N_INITIAL       ({:d})\n".format(t_samples-n_samples))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[N_STAGES*5];\n".format(coeff_arr_nm))
-    f.write("extern float32_t {:}[TOTAL_SAMPLES];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[N_SAMPLES];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-# def snr(tst, ref):
-#   sz = min(len(tst), len(ref))
-#   energy_tst = np.sum(tst * tst)
-#   energy_err = np.sum((ref - tst) * (ref - tst))
-#   snr = 10 * np.log10(energy_tst / energy_err)
-#   return snr
-
-
-def moving_average(a, n=3):
-  ret = np.cumsum(a, dtype=float)
-  ret[n:] = ret[n:] - ret[:-n]
-  return ret[n - 1:] / n
-
-
-""" Filter Design """
-order = 6          # IIR filter order
-n_sos = order//2   # number of second-order stages
-fs    = 48000      # sample frequency (Hz)
-ftype = 'butter'   # filter type
-btype = 'lowpass'  # filter type
-fc    = 1000       # cutoff frequency (Hz)
-
-sos = signal.iirfilter(N=order, Wn=fc, btype=btype, analog=False, ftype=ftype, output='sos', fs=fs)
-
-# print(len(sos), type(sos))
-# print(sos)
-
-
-""" Input Stimulus """
-N_long = 1024                     # starting signal length
-
-n = np.arange(N_long)             # sample indices
-t = n / fs                        # discrete time
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-30, -20, -10]    # input tone powers
-en_noise = False                  # optionally include zero-mean, unit std WGN
-noise_dB = -20                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, N_long))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" Filtered Output """
-# The filter function is implemented as a series of second-order filters with direct-form II transposed structure.
-y = signal.sosfilt(sos, x)
-# print(len(y), type(y))
-
-# approximate phase delay for 100 hz tone output
-x_100hz = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-correlation = signal.correlate(y, x_100hz, mode="full")
-lags = signal.correlation_lags(y.size, x_100hz.size, mode="full")
-delay = lags[np.argmax(correlation)]
-
-# ignore noisy output from initial conditions
-diff = np.abs(y[delay:] - x_100hz[:-delay])
-diff = moving_average(diff, n=32)
-N_initial = np.where(diff < 0.00015)[0][0]
-
-
-""" Pack SOS according to the CMSIS spec """
-# each row is [b0, b1, b2, a0, a1, a2]
-# make sure that a0 = 1 (divide each row by a0)
-coeff = sos / sos[:,3].reshape(3,1)
-# remove the a0 column and flip the signs of a1 and a2
-coeff = np.hstack((coeff[:,:3], -coeff[:,4:]))
-# remove the total gain from the first section
-# and distribute it across all sections
-total_gain = coeff[0,0]
-sec_gain = np.power(total_gain, 1/n_sos)
-coeff[0,:3] = coeff[0,:3] / total_gain
-coeff[:,:3] = coeff[:,:3] * sec_gain
-# flatten the array (# stages * 5 coeffs per stage)
-coeff = np.reshape(coeff, n_sos*5)
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-sos_32b = sos.astype(np.float32)
-y_32b = signal.sosfilt(sos_32b, x_32b)
-snr = snr_32b(y, y_32b)
-# the cmsis implementation seems to introduce some small extra loss of precision
-snr_ref = int(snr) - 5
-
-
-""" Plot """
-if en_plots:
-  delay_t = delay / fs
-
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  plt.plot(n, x)  # original signal
-  plt.plot(n, y, 'r')  # filtered
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.legend(['x', 'y'])
-  plt.title('Input and Output')
-
-  # Plot the good part of the filtered signal vs a pure 100hz tone
-  plt.subplot(122)
-  plt.plot(t, x_100hz)  # pure 100hz tone
-  plt.plot(t[N_initial:]-delay_t, y[N_initial:], 'r')  # the "good" part of the filtered signal
-  plt.xlabel('time')
-  plt.ylabel('amplitude')
-  plt.legend(['pure tone', 'y (good part)'])
-  plt.title('Filter Ouput vs Pure Tone')
-
-  plt.show()
-
-
-""" Write to file """
-if en_filegen:
-  input = x[:N_initial + N]
-  output_ref = y[N_initial:N_initial + N]
-
-  fpath         = os.path.join(fpath, 'sos{:}_n{:}'.format(n_sos, N))
-
-  fname = os.path.join(fpath, coeff_fnm)
-  fwrite_array_f32(fname, arr_name=coeff_arr_nm, arr=coeff, arr_size='N_STAGES*5', per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr=output_ref, arr_size='N_SAMPLES', per_line=8)
-
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr=input, arr_size='TOTAL_SAMPLES', per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, snr_ref=snr_ref, n_sos=n_sos,
-                t_samples=(N+N_initial), n_samples=N,
-                input_arr_nm=input_arr_nm, output_arr_nm=output_arr_nm, coeff_arr_nm=coeff_arr_nm)
diff --git a/tests/biquad_cascade_df2T_f32/test.mk b/tests/biquad_cascade_df2T_f32/test.mk
deleted file mode 100644
index 7233a00..0000000
--- a/tests/biquad_cascade_df2T_f32/test.mk
+++ /dev/null
@@ -1,58 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =sos3_n128
-  $(warning WARNING: CFG not set for biquad_cascade_df2T_f32. Defaulting to CFG=sos3_n18)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
-TST_SRC    +=$(DSP_DIR)/source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
-
-# data and configuration files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/coeff.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/dct4_2048_f32/python/dct4.py b/tests/dct4_2048_f32/python/dct4.py
deleted file mode 100755
index 0a0a4b2..0000000
--- a/tests/dct4_2048_f32/python/dct4.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy.fftpack import dct
-
-
-# USER: script settings
-en_plots      = True
-en_filegen    = False
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg', 'default')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "inout"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, dct_size, snr_ref, input_arr_nm, output_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define DCT4_SIZE       ({:d})\n".format(dct_size))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[DCT4_SIZE];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[DCT4_SIZE];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-""" DCT Design """
-dct_size = 2048     # DCT size
-
-
-""" Input Stimulus """
-fs   = 48000                      # sample frequency (Hz)
-n    = np.arange(dct_size)        # sample indices
-t    = n / fs                     # discrete time
-freq = (0.5*n / (dct_size/fs))    # discrete frequency
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-30, -20, -10]    # input tone powers
-en_noise = True                   # optionally include zero-mean, unit std WGN
-noise_dB = -25                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, dct_size))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" DCT Output """
-X = dct(x=x, type=4, n=dct_size, norm="ortho")
-
-# print(X, len(X))
-
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-X_32b = dct(x=x_32b, type=4, n=dct_size, norm="ortho")
-snr = snr_32b(X, X_32b)
-print("Expected 32b SNR = {:13.10f}".format(snr))
-# the cmsis implementation seems to introduce extra loss of precision
-snr_ref = 100
-
-
-""" Plot """
-if en_plots:
-  nyq = (dct_size//2)
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  if en_noise:
-    plt.plot(n, x_pure) # original signal
-    plt.plot(n, x)      # noisy signal
-    plt.legend(["x (original)", "x (noisy)"])
-  else:
-    plt.plot(n, x)
-    plt.legend(["x"])
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.title('Input')
-
-  # Plot the good part of the filtered signal vs a pure 100hz tone
-  plt.subplot(122)
-  plt.stem(freq, np.abs(X), 'b', markerfmt=" ", basefmt="-b")
-  plt.xlabel('frequency (Hz)')
-  plt.ylabel('|X(freq)|')
-  plt.title('DCT Spectrum')
-
-  plt.tight_layout()
-  plt.show()
-
-
-""" Write to file """
-if en_filegen:
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr=x, arr_size='DCT4_SIZE', per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr=X, arr_size='DCT4_SIZE', per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, dct_size=dct_size, snr_ref=snr_ref, input_arr_nm=input_arr_nm, output_arr_nm=output_arr_nm)
diff --git a/tests/dct4_2048_f32/test.mk b/tests/dct4_2048_f32/test.mk
deleted file mode 100644
index f2afb97..0000000
--- a/tests/dct4_2048_f32/test.mk
+++ /dev/null
@@ -1,66 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =default
-  $(warning WARNING: CFG not set for dct4_512_f32. Defaulting to CFG=default.)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_dct4_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_radix8_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_bitreversal2.c
-TST_SRC    +=$(DSP_DIR)/source/BasicMathFunctions/arm_mult_f32.c
-TST_SRC    +=$(DSP_DIR)/source/BasicMathFunctions/arm_scale_f32.c
-TST_SRC    +=$(DSP_DIR)/source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
-TST_SRC    +=$(DSP_DIR)/source/CommonTables/CommonTables.c
-
-# data and configuration files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/dct4_512_f32/python/dct4.py b/tests/dct4_512_f32/python/dct4.py
deleted file mode 100755
index 4a4015d..0000000
--- a/tests/dct4_512_f32/python/dct4.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy.fftpack import dct
-
-
-# USER: script settings
-en_plots      = True
-en_filegen    = False
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg', 'default')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "inout"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, dct_size, snr_ref, input_arr_nm, output_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define DCT4_SIZE       ({:d})\n".format(dct_size))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[DCT4_SIZE];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[DCT4_SIZE];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-""" DCT Design """
-dct_size = 512     # DCT size
-
-
-""" Input Stimulus """
-fs   = 48000                      # sample frequency (Hz)
-n    = np.arange(dct_size)        # sample indices
-t    = n / fs                     # discrete time
-freq = (0.5*n / (dct_size/fs))    # discrete frequency
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-30, -20, -10]    # input tone powers
-en_noise = True                   # optionally include zero-mean, unit std WGN
-noise_dB = -25                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, dct_size))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" DCT Output """
-X = dct(x=x, type=4, n=dct_size, norm="ortho")
-
-# print(X, len(X))
-
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-X_32b = dct(x=x_32b, type=4, n=dct_size, norm="ortho")
-snr = snr_32b(X, X_32b)
-# the cmsis implementation seems to introduce extra loss of precision
-snr_ref = int(snr) - 20
-
-
-""" Plot """
-if en_plots:
-  nyq = (dct_size//2)
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  if en_noise:
-    plt.plot(n, x_pure) # original signal
-    plt.plot(n, x)      # noisy signal
-    plt.legend(["x (original)", "x (noisy)"])
-  else:
-    plt.plot(n, x)
-    plt.legend(["x"])
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.title('Input')
-
-  # Plot the good part of the filtered signal vs a pure 100hz tone
-  plt.subplot(122)
-  plt.stem(freq, np.abs(X), 'b', markerfmt=" ", basefmt="-b")
-  plt.xlabel('frequency (Hz)')
-  plt.ylabel('|X(freq)|')
-  plt.title('DCT Spectrum')
-
-  plt.tight_layout()
-  plt.show()
-
-
-""" Write to file """
-if en_filegen:
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr=x, arr_size='DCT4_SIZE', per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr=X, arr_size='DCT4_SIZE', per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, dct_size=dct_size, snr_ref=snr_ref, input_arr_nm=input_arr_nm, output_arr_nm=output_arr_nm)
diff --git a/tests/dct4_512_f32/test.mk b/tests/dct4_512_f32/test.mk
deleted file mode 100644
index f2afb97..0000000
--- a/tests/dct4_512_f32/test.mk
+++ /dev/null
@@ -1,66 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =default
-  $(warning WARNING: CFG not set for dct4_512_f32. Defaulting to CFG=default.)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_dct4_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_radix8_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_bitreversal2.c
-TST_SRC    +=$(DSP_DIR)/source/BasicMathFunctions/arm_mult_f32.c
-TST_SRC    +=$(DSP_DIR)/source/BasicMathFunctions/arm_scale_f32.c
-TST_SRC    +=$(DSP_DIR)/source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
-TST_SRC    +=$(DSP_DIR)/source/CommonTables/CommonTables.c
-
-# data and configuration files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/fir_f32/python/fir_taps256.py b/tests/fir_f32/python/fir_taps256.py
deleted file mode 100755
index 257feb4..0000000
--- a/tests/fir_f32/python/fir_taps256.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy import signal
-
-
-# USER: select number of samples
-#   desired clean output signal length (beyond initial noisy output)
-N             = 128
-
-# USER: script settings
-en_plots      = True
-en_filegen    = False
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-coeff_fnm     = "coeff.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "input"
-coeff_arr_nm  = "coeff"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, n_taps, t_samples, n_samples, snr_ref, input_arr_nm, output_arr_nm, coeff_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define N_TAPS          ({:d})\n".format(n_taps))
-    f.write("#define TOTAL_SAMPLES   ({:d})\n".format(t_samples))
-    # f.write("#define N_INITIAL       ({:d})\n".format(n_initial))
-    f.write("#define N_SAMPLES       ({:d})\n".format(n_samples))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[N_TAPS];\n".format(coeff_arr_nm))
-    f.write("extern float32_t {:}[TOTAL_SAMPLES];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[N_SAMPLES];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-""" Filter Design """
-n_taps = 256        # FIR filter order
-fs     = 48000      # sample frequency (Hz)
-win    = "hamming"  # filter window type
-fc     = 1000       # cutoff frequency
-scale  = True       # sampling flag
-
-coeff = signal.firwin(n_taps, fc, window=win, scale=scale, fs=fs)
-# print(len(coeff), type(coeff))
-# print(coeff)
-
-
-""" Input Stimulus """
-N_tot = n_taps + N                # signal length
-
-n = np.arange(N_tot)              # sample indices
-t = n / fs                        # discrete time
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-10, -20, -10]    # input tone powers
-en_noise = True                   # optionally include zero-mean, unit std WGN
-noise_dB = -30                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, N_tot))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" Filtered Output """
-y = signal.lfilter(coeff, 1.0, x)
-# print(len(y), type(y))
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-coeff_32b = coeff.astype(np.float32)
-y_32b = signal.lfilter(coeff_32b, np.float32(1.0), x_32b)
-snr = snr_32b(y, y_32b)
-# the cmsis implementation seems to introduce some extra loss of precision
-snr_ref = int(snr) - 20
-
-
-""" Plot """
-if en_plots:
-  # The phase delay of the filtered signal. The first n_taps-1
-  # samples are "corrupted" by the initial conditions
-  delay = 0.5 * (n_taps-1) / fs
-
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  plt.plot(n, x)  # original signal
-  plt.plot(n, y, 'r')  # filtered
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.legend(['x', 'y'])
-  plt.title('Input and Output')
-
-  # plt.figure(2)
-  # plt.plot(t, x)  # original signal
-  # plt.plot(t[n_taps-1:]-delay, y[n_taps-1:], 'r')  # the "good" part of the filtered signal
-  # plt.xlabel('time')
-  # plt.legend(['x', 'filtered x (good part)'])
-
-  # Plot the good part of the filtered signal vs a pure 100hz tone
-  x_100hz = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-  plt.subplot(122)
-  plt.plot(t, x_100hz)  # pure 100hz tone
-  plt.plot(t[n_taps-1:]-delay, y[n_taps-1:], 'r')  # the "good" part of the filtered signal
-  plt.xlabel('time')
-  plt.ylabel('amplitude')
-  plt.legend(['pure tone', 'y (good part)'])
-  plt.title('Filter Ouput vs Pure Tone')
-
-  plt.show()
-
-
-""" Write to file """
-if en_filegen:
-  fpath         = os.path.join(fpath, 'taps{:}_n{:}'.format(n_taps, N))
-
-  fname = os.path.join(fpath, coeff_fnm)
-  fwrite_array_f32(fname, arr_name=coeff_arr_nm, arr=coeff, arr_size='N_TAPS', per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr=y[n_taps:], arr_size='N_SAMPLES', per_line=8)
-
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr=x, arr_size='TOTAL_SAMPLES', per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, n_taps=n_taps,
-                t_samples=N_tot, n_samples=N,
-                snr_ref=snr_ref,
-                input_arr_nm=input_arr_nm, output_arr_nm=output_arr_nm, coeff_arr_nm=coeff_arr_nm)
diff --git a/tests/fir_f32/test.mk b/tests/fir_f32/test.mk
deleted file mode 100644
index 64eb13c..0000000
--- a/tests/fir_f32/test.mk
+++ /dev/null
@@ -1,58 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =taps256_n128
-  $(warning WARNING: CFG not set for fir_f32. Defaulting to CFG=taps256_n128)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/FilteringFunctions/arm_fir_f32.c
-TST_SRC    +=$(DSP_DIR)/source/FilteringFunctions/arm_fir_init_f32.c
-
-# data files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/coeff.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/hello/test.mk b/tests/hello/test.mk
deleted file mode 100644
index 1dd3838..0000000
--- a/tests/hello/test.mk
+++ /dev/null
@@ -1,41 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    += $(DSP_INC)
-TST_DEF    += $(DSP_DEF)
-
-
-##############################################################
-# TEST
-##############################################################
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    += -I $(TST_DIR)
-
-### FLAGS
-TST_FLG    += $(TST_DEF)
-
-### SOURCES
-TST_SRC    += $(TST_DIR)/test_main.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/hello/test_main.c b/tests/hello/test_main.c
deleted file mode 100644
index 7d3731e..0000000
--- a/tests/hello/test_main.c
+++ /dev/null
@@ -1,20 +0,0 @@
-#include <stdint.h>
-#include <stdio.h>
-
-#include "boardsupport.h"
-
-/**
- * @brief test_main
- * 
- */
-int test_main (void)
-{
-    start_trigger();
-    printf("Hello, world.\n");
-    stop_trigger();
-
-    uint32_t ccnt = get_ccnt();
-    printf("CCNT = %i\n", ccnt);
-
-    return 0;
-}
diff --git a/tests/rfft2048_f32/python/fft.py b/tests/rfft2048_f32/python/fft.py
deleted file mode 100755
index 4af2c59..0000000
--- a/tests/rfft2048_f32/python/fft.py
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy.fftpack import fft
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# script settings
-en_plots      = True
-en_filegen    = False
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg', 'default')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "input"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, N, ifft_flag, snr_ref, input_arr_nm, output_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define FFT_SIZE        ({:d})\n".format(N))
-    f.write("#define IFFT_FLAG       ({:d})\n".format(ifft_flag))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[FFT_SIZE];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[FFT_SIZE];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-def repack_cmsis_rfft(X, N):
-  nyq = (N//2)
-  X_re = np.real(X)
-  X_im = np.imag(X)
-
-  X_dc = X_re[0]
-  X_nyq = X_re[nyq]
-
-  X_cmsis = [X_dc, X_nyq]
-  for i in range(1, nyq):
-    X_cmsis += [X_re[i], X_im[i]]
-
-  return np.array(X_cmsis)
-
-
-""" FFT Design """
-N  = 2048         # FFT size
-ifft_flag = False # IFFT not currently supported by this script
-
-
-""" Input Stimulus """
-fs = 48000                        # sample frequency
-n = np.arange(N)                  # sample indices
-t = n / fs                        # discrete time
-freq = n / (N/fs)                 # discrete frequency
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-30, -20, -10]    # input tone powers
-en_noise = True                   # optionally include zero-mean, unit std WGN
-noise_dB = -20                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, N))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" FFT output """
-X = fft(x=x, n=N)
-
-
-""" Pack the FFT output according to the CMSIS spec """
-X_cmsis = repack_cmsis_rfft(X, N)
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-X_32b = fft(x=x_32b, n=N)
-X_32b_cmsis = repack_cmsis_rfft(X_32b, N)
-snr = snr_32b(X_cmsis, X_32b_cmsis)
-# the cmsis implementation seems to introduce some small extra loss of precision
-snr_ref = int(snr) - 1
-
-
-""" Plot """
-if en_plots:
-  nyq = (N//2)
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  if en_noise:
-    plt.plot(n, x_pure) # original signal
-    plt.plot(n, x)      # noisy signal
-    plt.legend(["x (original)", "x (noisy)"])
-  else:
-    plt.plot(n, x)
-    plt.legend(["x"])
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.title('Input')
-
-  plt.subplot(122)
-  # spectrum is symmetric for real input, so only plot the first half
-  plt.stem(freq[:nyq+1], np.abs(X[:nyq+1]), 'b', markerfmt=" ", basefmt="-b")
-  plt.xlabel('frequency (Hz)')
-  plt.ylabel('|X(freq)|')
-  plt.title('Spectrum')
-
-  plt.tight_layout()
-  plt.show()
-
-
-""" Write to files """
-if en_filegen:
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr_size='FFT_SIZE', arr=x, per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr_size='FFT_SIZE', arr=X_cmsis, per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, N, ifft_flag, snr_ref, input_arr_nm, output_arr_nm)
diff --git a/tests/rfft2048_f32/test.mk b/tests/rfft2048_f32/test.mk
deleted file mode 100644
index ddea796..0000000
--- a/tests/rfft2048_f32/test.mk
+++ /dev/null
@@ -1,62 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =default
-  $(warning WARNING: CFG not set for rfft1024_f32. Defaulting to CFG=default.)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_radix8_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_bitreversal2.c
-TST_SRC    +=$(DSP_DIR)/source/CommonTables/CommonTables.c
-
-# data files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))
diff --git a/tests/rfft512_f32/python/fft.py b/tests/rfft512_f32/python/fft.py
deleted file mode 100755
index bea959b..0000000
--- a/tests/rfft512_f32/python/fft.py
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-from scipy.fftpack import fft
-
-
-# fixed seed for reproducibility
-np.random.seed(42)
-
-# path to current test directory
-tst_path      = os.path.realpath(__file__)
-tst_path      = tst_path[:tst_path.find("python")]
-
-# script settings
-en_plots      = True
-en_filegen    = True
-
-# generated file path and names
-fpath         = os.path.join(tst_path, 'cfg', 'default')
-input_fnm     = "in.c"
-output_fnm    = "out.c"
-header_fnm    = "data.h"
-# C array names
-input_arr_nm  = "input"
-output_arr_nm = "output_ref"
-
-
-def fwrite_array_f32(fname, arr_name, arr_size, arr, per_line):
-  """ Write array to C file """
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  float_fmt = '{:13.10f}'  # 13 digits total, 10 decimal points floating-point
-  sz = len(arr)
-  with open(fname, 'w') as f:
-    f.write("\n#include \"data.h\"")
-    f.write("\n\nfloat32_t {:}[{:}] = \n{{\n  ".format(arr_name, arr_size))
-    for i in range(sz):
-      if i == (sz - 1):
-        f.write("{:}f\n".format(float_fmt.format(arr[i])))
-      else:
-        f.write("{:}f, ".format(float_fmt.format(arr[i])))
-        if ((i+1) % per_line) == 0:
-          f.write("\n  ")
-    f.write("};\n")
-
-
-def fwrite_header(fname, N, ifft_flag, snr_ref, input_arr_nm, output_arr_nm):
-  outdir, f = os.path.split(fname)
-  if not os.path.exists(outdir):
-    os.makedirs(outdir)
-  with open(fname, 'w') as f:
-    f.write("\n#ifndef DATA_H\n#define DATA_H\n")
-    f.write("\n#include \"arm_math.h\"\n")
-    f.write("\n")
-    f.write("#define FFT_SIZE        ({:d})\n".format(N))
-    f.write("#define IFFT_FLAG       ({:d})\n".format(ifft_flag))
-    f.write("#define SNR_REF_THLD    ({:d})\n".format(snr_ref))
-    f.write("\n")
-    f.write("extern float32_t {:}[FFT_SIZE];\n".format(input_arr_nm))
-    f.write("extern float32_t {:}[FFT_SIZE];\n".format(output_arr_nm))
-    f.write("\n#endif  // DATA_H\n")
-
-
-def snr_32b(ref, tst):
-  """ Calculate the SNR in 32-bit floating-point precision. """
-  energy_sig = np.float32(0)
-  energy_err = np.float32(0)
-  ref_32b = ref.astype(np.float32)
-  tst_32b = tst.astype(np.float32)
-  for ri, ti in zip(ref_32b, tst_32b):
-    energy_sig += np.power(ri, np.float32(2))
-    energy_err += np.power(np.subtract(ri, ti), np.float32(2))
-  snr = np.float32(10) * np.log10(np.divide(energy_sig, energy_err))
-  return snr
-
-
-def repack_cmsis_rfft(X, N):
-  nyq = (N//2)
-  X_re = np.real(X)
-  X_im = np.imag(X)
-
-  X_dc = X_re[0]
-  X_nyq = X_re[nyq]
-
-  X_cmsis = [X_dc, X_nyq]
-  for i in range(1, nyq):
-    X_cmsis += [X_re[i], X_im[i]]
-
-  return np.array(X_cmsis)
-
-
-""" FFT Design """
-N  = 512          # FFT size
-ifft_flag = False # IFFT not currently supported by this script
-
-
-""" Input Stimulus """
-fs = 48000                        # sample frequency
-n = np.arange(N)                  # sample indices
-t = n / fs                        # discrete time
-freq = n / (N/fs)                 # discrete frequency
-
-tone_freq_hz = [100, 4000, 8000]  # input tones
-tone_amp_dB  = [-25, -20, -10]    # input tone powers
-en_noise = True                   # optionally include zero-mean, unit std WGN
-noise_dB = -20                    # noise power
-
-# generate and sum up the pure tones
-x_pure = (np.power(10,(tone_amp_dB[0]/20))) * np.sin(2 * np.pi * tone_freq_hz[0] * t)
-for i in range(1, len(tone_freq_hz)):
-  x_pure += (np.power(10,(tone_amp_dB[i]/20))) * np.sin(2 * np.pi * tone_freq_hz[i] * t)
-
-# optionally add zero-mean gaussian white noise
-if en_noise:
-  x = x_pure + (np.power(10, (noise_dB/20)) * np.random.normal(0, 1, N))
-else:
-  x = x_pure
-
-# print(len(x), type(x))
-
-
-""" FFT output """
-X = fft(x=x, n=N)
-
-
-""" Pack the FFT output according to the CMSIS spec """
-X_cmsis = repack_cmsis_rfft(X, N)
-
-
-""" Calculate the expected SNR of 32b precision result """
-x_32b = x.astype(np.float32)
-X_32b = fft(x=x_32b, n=N)
-X_32b_cmsis = repack_cmsis_rfft(X_32b, N)
-snr = snr_32b(X_cmsis, X_32b_cmsis)
-# the cmsis implementation seems to introduce some small extra loss of precision
-snr_ref = int(snr) - 1
-
-
-""" Plot """
-if en_plots:
-  nyq = (N//2)
-  plt.figure(figsize = (12, 6))
-  plt.subplot(121)
-  if en_noise:
-    plt.plot(n, x_pure) # original signal
-    plt.plot(n, x)      # noisy signal
-    plt.legend(["x (original)", "x (noisy)"])
-  else:
-    plt.plot(n, x)
-    plt.legend(["x"])
-  plt.xlabel('sample')
-  plt.ylabel('amplitude')
-  plt.title('Input')
-
-  plt.subplot(122)
-  # spectrum is symmetric for real input, so only plot the first half
-  plt.stem(freq[:nyq+1], np.abs(X[:nyq+1]), 'b', markerfmt=" ", basefmt="-b")
-  plt.xlabel('frequency (Hz)')
-  plt.ylabel('|X(freq)|')
-  plt.title('Spectrum')
-
-  plt.tight_layout()
-  plt.show()
-
-
-""" Write to files """
-if en_filegen:
-  fname = os.path.join(fpath, input_fnm)
-  fwrite_array_f32(fname, arr_name=input_arr_nm, arr_size='FFT_SIZE', arr=x, per_line=8)
-
-  fname = os.path.join(fpath, output_fnm)
-  fwrite_array_f32(fname, arr_name=output_arr_nm, arr_size='FFT_SIZE', arr=X_cmsis, per_line=8)
-
-  fname = os.path.join(fpath, header_fnm)
-  fwrite_header(fname, N, ifft_flag, snr_ref, input_arr_nm, output_arr_nm)
diff --git a/tests/rfft512_f32/test.mk b/tests/rfft512_f32/test.mk
deleted file mode 100644
index 28ea418..0000000
--- a/tests/rfft512_f32/test.mk
+++ /dev/null
@@ -1,62 +0,0 @@
-##############################################################
-# INIT
-##############################################################
-
-TST_SRC    :=
-TST_INC    :=
-TST_DEF    :=
-TST_OBJ    :=
-
-# may be initialized/modified elsewhere with global settings
-TST_FLG    +=
-
-
-##############################################################
-# EXTERNAL
-##############################################################
-
-### global options for all library users
-include $(ROOT)/dsp_inc.mk
-TST_INC    +=$(DSP_INC)
-TST_DEF    +=$(DSP_DEF)
-
-### snr checker
-TST_INC    +=-I $(CMN_DIR)
-TST_SRC    +=$(CMN_DIR)/snr.c
-
-
-##############################################################
-# TEST
-##############################################################
-
-ifeq ($(CFG),)
-  CFG       =default
-  $(warning WARNING: CFG not set for rfft512_f32. Defaulting to CFG=default.)
-endif
-
-### DEFINES
-TST_DEF    +=
-
-### INCLUDES
-TST_INC    +=-I $(TST_DIR)
-TST_INC    +=-I $(TST_DIR)/cfg/$(CFG)
-
-### FLAGS
-TST_FLG    +=$(TST_DEF)
-
-### SOURCES
-TST_SRC    +=$(TST_DIR)/test_main.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_rfft_fast_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_init_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_cfft_radix8_f32.c
-TST_SRC    +=$(DSP_DIR)/source/TransformFunctions/arm_bitreversal2.c
-TST_SRC    +=$(DSP_DIR)/source/CommonTables/CommonTables.c
-
-# data files
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/in.c
-TST_SRC    +=$(TST_DIR)/cfg/$(CFG)/out.c
-
-### OBJECTS
-TST_OBJ    +=$(patsubst %.c,%.o, $(patsubst %.S,%.o,$(notdir $(TST_SRC))))