diff --git a/.gitignore b/.gitignore
index de3b70f26..8ba775916 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,3 +69,6 @@ conanprofile
**/temp/
**/node_modules/
+
+
+/test*
diff --git a/.run/silo --api.run.xml b/.run/silo --api.run.xml
deleted file mode 100644
index 734ce51dd..000000000
--- a/.run/silo --api.run.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.run/silo --preprocessing.run.xml b/.run/silo --preprocessing.run.xml
deleted file mode 100644
index 68b55e50d..000000000
--- a/.run/silo --preprocessing.run.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.run/silo api.run.xml b/.run/silo api.run.xml
new file mode 100644
index 000000000..c4be1a657
--- /dev/null
+++ b/.run/silo api.run.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.run/silo preprocessing.run.xml b/.run/silo preprocessing.run.xml
new file mode 100644
index 000000000..673abf181
--- /dev/null
+++ b/.run/silo preprocessing.run.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 963255ef2..44f4379c5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,7 @@ file(GLOB_RECURSE SRC_SILO "src/*.cpp")
list(REMOVE_ITEM SRC_SILO ${SRC_TEST})
set(SRC_SILO_WITHOUT_MAIN ${SRC_SILO})
-list(REMOVE_ITEM SRC_SILO_WITHOUT_MAIN "${CMAKE_SOURCE_DIR}/src/silo_api/api.cpp")
+list(REMOVE_ITEM SRC_SILO_WITHOUT_MAIN "${CMAKE_SOURCE_DIR}/src/main.cpp")
# ---------------------------------------------------------------------------
# Linter
@@ -105,9 +105,9 @@ target_link_libraries(
re2::re2
)
-add_executable(siloApi "${CMAKE_SOURCE_DIR}/src/silo_api/api.cpp" $)
+add_executable(silo "${CMAKE_SOURCE_DIR}/src/main.cpp" $)
target_link_libraries(
- siloApi
+ silo
PUBLIC
silolib
)
diff --git a/Dockerfile b/Dockerfile
index a2d904b35..4f2ba657d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,7 +7,7 @@ COPY . ./
RUN \
python3 ./build_with_conan.py --release --parallel 4\
&& cp build/Release/silo_test . \
- && cp build/Release/siloApi .
+ && cp --no-dereference build/Release/{silo,siloServer,siloPreprocessor} .
FROM ubuntu:22.04 AS server
@@ -15,7 +15,7 @@ FROM ubuntu:22.04 AS server
WORKDIR /app
COPY docker_default_preprocessing_config.yaml ./default_preprocessing_config.yaml
COPY docker_runtime_config.yaml ./runtime_config.yaml
-COPY --from=builder /src/siloApi ./
+COPY --from=builder /src/{silo,siloServer,siloPreprocessor} ./
RUN apt update && apt dist-upgrade -y \
&& apt install -y libtbb12 curl jq
@@ -25,7 +25,7 @@ HEALTHCHECK --start-period=20s CMD curl --fail --silent localhost:8081/info | jq
EXPOSE 8081
-ENTRYPOINT ["./siloApi"]
+ENTRYPOINT ["./silo"]
LABEL org.opencontainers.image.source="https://github.com/GenSpectrum/LAPIS-SILO"
LABEL org.opencontainers.image.description="Sequence Indexing engine for Large Order of genomic data"
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..4fe83295c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+# Note: you can set the COLOR environment variable to 1 to get the log
+# files produced with color codes.
+
+export PATH := bin:$(PATH)
+
+all: build/siloServer build/siloPreprocessor build/silo_test
+
+clean:
+ find build -name "*.o" -print0 | xargs -0 rm -f
+ rm -f build/siloApi build/silo_test
+
+# Have separate targets for the binaries, but share a single build
+# (faster to build them both?); use `run-cached` to only actually run one
+# build, though.
+
+build/siloServer: build/silo
+
+build/siloPreprocessor: build/silo
+
+build/silo: $(shell bin/cplusplus-source-files)
+ run-cached build/.exit-code build $@
+
+build/silo_test: $(shell bin/cplusplus-source-files)
+ run-cached build/.exit-code build $@
+
+
+# Tests produce log files if successful (if not successful, the log
+# file can be found with .tmp appended, but is also printed to stdout).
+
+build/unit-tests.log: build/silo_test
+ run-with-log $@ build/silo_test
+
+build/preprocessing-tsv.log: \
+ build/siloPreprocessor \
+ $(shell bin/preprocessing-input-files-in testBaseData/exampleDataset)
+ run-with-log $@ preprocessing-in testBaseData/exampleDataset
+
+build/preprocessing-ndjson.log: \
+ build/siloPreprocessor \
+ $(shell bin/preprocessing-input-files-in testBaseData/exampleDatasetAsNdjson)
+ run-with-log $@ preprocessing-in testBaseData/exampleDatasetAsNdjson
+
+build/tsv-tests.log: build/siloServer build/preprocessing-tsv.log $(shell bin/test-query-files)
+ run-with-log $@ runtests-e2e testBaseData/exampleDataset 7001
+
+build/ndjson-tests.log: build/siloServer build/preprocessing-ndjson.log $(shell bin/test-query-files)
+ run-with-log $@ runtests-e2e testBaseData/exampleDatasetAsNdjson 7002
+
+test: build/unit-tests.log build/tsv-tests.log build/ndjson-tests.log
+
+
+# Manually run the api so that it can be queried interactively.
+
+runapi-tsv: build/siloServer build/preprocessing-tsv.log
+ runapi-in testBaseData/exampleDataset 8081
+
+runapi-ndjson: build/siloServer build/preprocessing-tsv.log
+ runapi-in testBaseData/exampleDatasetAsNdjson 8081
+
+
+.PHONY: all clean test runapi-tsv runapi-ndjson
diff --git a/bin/build b/bin/build
new file mode 100755
index 000000000..ab8ba50ce
--- /dev/null
+++ b/bin/build
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "usage: $0 [...]"
+ echo " Auto-detect the mold linker and ninja builder and"
+ echo " call cmake accordingly. Requires up to date configuration"
+ echo " via conan first. Currently only fully runs on Linux and"
+ echo " Mac OS, detection for core count on Windows is missing."
+ echo " Arguments are passed on to make/ninja."
+ exit 1
+}
+
+if [[ $# -gt 0 ]] && { [[ "$1" = "-h" ]] || [[ "$1" = "--help" ]] ; }; then
+ usage
+fi
+
+args=(../)
+
+if _mold_path=$(which mold); then
+ args=("${args[@]}" -D CMAKE_EXE_LINKER_FLAGS=-fuse-ld=mold)
+fi
+
+if _ninja_path=$(which ninja); then
+ args=("${args[@]}" -G Ninja)
+ make=(ninja)
+else
+ # Linux: /proc/cpuinfo
+ # Mac OS: sysctl -n hw.ncpu
+ corecount=$(grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu)
+ make=(make "-j$corecount")
+fi
+
+_do() {
+ echo "+" "$@"
+ "$@"
+}
+
+_do cd build
+_do cmake "${args[@]}"
+# Make all binaries..
+_do "${make[@]}"
+
+# ..but touch the desired ones afterwards for the top-level Makefile's
+# sake (XX sigh, alternatives?)
+#if [ $# -gt 0 ]; then
+# cd ..
+# touch "$@"
+#fi
+# --- nah, just let it re-run build, sadly, still better than re-running tests unnecessarily.
+
diff --git a/bin/cplusplus-source-files b/bin/cplusplus-source-files
new file mode 100755
index 000000000..f599056eb
--- /dev/null
+++ b/bin/cplusplus-source-files
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+find src/ include/ -name "*.h" -o -name "*.cpp" | grep -v '#'
+
diff --git a/bin/gco-make-test b/bin/gco-make-test
new file mode 100755
index 000000000..da5c7773f
--- /dev/null
+++ b/bin/gco-make-test
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -emuo pipefail
+IFS=
+
+set -x
+
+while read -r commit; do
+ git checkout "$commit"
+ make -j16 test
+done
+
diff --git a/bin/preprocessing-in b/bin/preprocessing-in
new file mode 100755
index 000000000..780bbadb9
--- /dev/null
+++ b/bin/preprocessing-in
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "$0 test-input-dir" >&2
+ false
+}
+
+if [[ $# != 1 ]]; then
+ usage
+fi
+if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
+ usage
+fi
+testdir="$1"
+mypath=$(readlink -f "$0")
+mydir=$(dirname "$mypath")
+builddir=$(dirname "$mydir")/build
+
+IFS=' '
+
+set +x
+
+rm -rf temp
+
+cd "$testdir"
+
+rm -rf {output,temp,test?*}
+
+
+#SPDLOG_LEVEL=debug
+${DEBUGGER-} "$builddir"/siloPreprocessor
diff --git a/bin/preprocessing-input-files-in b/bin/preprocessing-input-files-in
new file mode 100755
index 000000000..f14b06554
--- /dev/null
+++ b/bin/preprocessing-input-files-in
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+# preprocessing inputs
+bin/verified-find "$1" -maxdepth 1 -a \( -name "*.fasta" -o -name "*.yaml" -o -name "*.xz" \
+ -o -name "*.zst" -o -name "*.json" -o -name "*.tsv" -o -name "*.ndjson" \)
diff --git a/bin/run-cached b/bin/run-cached
new file mode 100755
index 000000000..3bdb623b2
--- /dev/null
+++ b/bin/run-cached
@@ -0,0 +1,80 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "usage: $0 [-v|--verbose] file-for-exitcode cmd [args...]"
+ echo " Takes a lock on file-for-exitcode, if it gets the lock,"
+ echo " runs cmd with args and stores the exit code in file-for-exitcode."
+ echo " If the file is already locked, waits until unlocked then reads"
+ echo " the file contents as the exit code."
+ echo " Exits with the exitcode in either case."
+ echo " If -v/--verbose is given, prints to stderr when it's waiting for"
+ echo " another instance."
+ exit 1
+}
+
+if [[ $# -lt 1 ]]; then
+ usage
+fi
+
+if [[ "$1" = -v ]] || [[ "$1" = --verbose ]]; then
+ verbose=1
+ shift
+else
+ verbose=0
+fi
+
+if [[ $# -lt 2 ]]; then
+ usage
+fi
+
+cachepath="$1"
+shift
+
+info() {
+ if [[ "$verbose" = 1 ]]; then
+ echo "$@" >&2
+ fi
+}
+
+
+# Open the cache file read/write to an unused file descriptor.
+exec {fd}<>"$cachepath"
+
+if flock -E77 --nonblock "$fd"; then
+ # We have an exclusive lock, so we are supposed to run the cmd;
+ # don't fail if it doesn't exit successfully.
+
+ # Remove previously stored code. (Have to reopen, `>&"$fd"` does
+ # not truncate.)
+ true > "$cachepath"
+
+ if "$@"; then
+ code=$?
+ else
+ code=$?
+ fi
+ echo "$code" >&"$fd"
+ exit "$code"
+else
+ e=$?
+ if [[ "$e" == 77 ]]; then
+ # Another instance is running the command already; wait it out
+ # then read out the exit code.
+ info -n "$0 $cachepath: waiting for concurrent run to finish..."
+ flock "$fd"
+ # `read` will fail if the file is empty (possible if the other
+ # $0-instance was killed)
+ if read -r code <&"$fd"; then
+ info "exited with code $code"
+ exit "$code"
+ else
+ info "other $0 instance was killed"
+ exit 130
+ fi
+ else
+ echo "$0: got exit code $e, is the 'flock' command not available?" >&2
+ false
+ fi
+fi
diff --git a/bin/run-with-log b/bin/run-with-log
new file mode 100755
index 000000000..968cb3752
--- /dev/null
+++ b/bin/run-with-log
@@ -0,0 +1,43 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "$0 logfile cmd [args...]"
+ echo " Renames logfile to logfile.old, then runs cmd with args,"
+ echo " redirecting its stdout/stderr to logfile.tmp, when successful"
+ echo " renames logfile.tmp to logfile."
+ echo " If not successful, prints logfile.tmp to stdout and fails."
+ echo
+ echo " If the COLOR env variable is true, the cmd will not see the "
+ echo " pipe and hence will colorize as if running under a terminal."
+ echo " This requites the 'unbuffer' tool from the 'expect' package "
+ echo " to be installed."
+ exit 1
+}
+
+if [[ $# -lt 2 ]]; then
+ usage
+fi
+
+logfile="$1"
+shift
+
+if [ -e "$logfile" ]; then
+ mv "$logfile" "$logfile".old
+fi
+
+_run() {
+ if [[ "${COLOR-0}" = 0 ]]; then
+ "$@"
+ else
+ unbuffer "$@"
+ fi
+}
+
+if _run "$@" > "$logfile".tmp 2>&1; then
+ mv "$logfile".tmp "$logfile"
+else
+ P="$logfile".tmp perl -wne 's/^/$ENV{P}\t/; print' < "$logfile".tmp
+ false
+fi
diff --git a/bin/runapi-in b/bin/runapi-in
new file mode 100755
index 000000000..1b8f91e10
--- /dev/null
+++ b/bin/runapi-in
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "$0 test-input-dir port-number" >&2
+ false
+}
+
+if [[ $# != 2 ]]; then
+ usage
+fi
+testdir="$1"
+portnumber="$2"
+
+IFS=' '
+
+set -x
+
+cd "$testdir"
+
+export SPDLOG_LEVEL=${SPDLOG_LEVEL-debug}
+exec ${DEBUGGER-} ../../build/siloServer --api-port "$portnumber"
diff --git a/bin/runtests-e2e b/bin/runtests-e2e
new file mode 100755
index 000000000..78d49efdd
--- /dev/null
+++ b/bin/runtests-e2e
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+#set -x
+
+usage() {
+ echo "$0 test-input-dir port-number" >&2
+ false
+}
+
+if [[ $# != 2 ]]; then
+ usage
+fi
+testdir="$1"
+portnumber="$2"
+
+
+runapi-in "$testdir" "$portnumber" &
+apipid=$(jobs -p)
+
+cleanup () {
+ kill -9 "$apipid" || true
+}
+trap cleanup EXIT
+
+# Wait until the API is ready
+tries=300
+while true; do
+ if res=$(curl --silent --fail-early --data '{}' http://localhost:"$portnumber"/query); then
+ if echo " $res"| grep -q "Database not initialized yet"; then
+ true # continue
+ else
+ break
+ fi
+ fi
+ tries=$(( tries - 1 ))
+ if [[ $tries < 1 ]]; then
+ echo "Timeout waiting for the database to be ready."
+ exit 1
+ fi
+ sleep 1
+done
+
+# Run the tests
+SILO_URL=127.0.0.1:"$portnumber" node --test --test-reporter=tap
+
+# Bash afterwards kills the API process via `cleanup`.
diff --git a/bin/test-query-files b/bin/test-query-files
new file mode 100755
index 000000000..c053bb75e
--- /dev/null
+++ b/bin/test-query-files
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+# query inputs
+bin/verified-find endToEndTests/test/ -name "*.json"
+
diff --git a/bin/verified-find b/bin/verified-find
new file mode 100755
index 000000000..d940e292b
--- /dev/null
+++ b/bin/verified-find
@@ -0,0 +1,45 @@
+#!/bin/bash
+set -meuo pipefail
+IFS=
+
+usage() {
+ echo "usage: $0 dir [more find arguments ...]"
+ echo " Runs find with the given dir and arguments,"
+ echo " but if it can run git ls-files on the same dir,"
+ echo " verifies if both give the same output and warns"
+ echo " if not."
+ false
+}
+
+if [[ $# -lt 1 ]]; then
+ usage
+fi
+if [[ "$1" = -h ]] || [[ "$1" = --help ]]; then
+ usage
+fi
+
+tmp1=$(mktemp)
+tmp2=$(mktemp)
+tmp3=$(mktemp)
+
+find "$@" | LANG=C sort > "$tmp1"
+cat "$tmp1"
+
+# oh, have to exclude the .js files in the endToEndTests/test/
+# case; there are TWO subdirectories with test files in
+# testBaseData, but who knows if that changes in the future?
+if git ls-files "$1" | grep -v '\.js$' | LANG=C sort > "$tmp2" 2>/dev/null; then
+ if [ -s "$tmp2" ]; then
+ if diff -u "$tmp1" "$tmp2" > "$tmp3"; then
+ true
+ else
+ {
+ echo "Warning: $0: '$1' yielded different results for find vs. git:"
+ cat "$tmp3"
+ } >&2
+ # but continue running.
+ fi
+ fi
+fi
+
+rm -f "$tmp1" "$tmp2" "$tmp3"
diff --git a/doc/config.md b/doc/config.md
new file mode 100644
index 000000000..a64afc637
--- /dev/null
+++ b/doc/config.md
@@ -0,0 +1,53 @@
+# How the configuration system works
+
+SILO takes configuration information from 3 configuration sources:
+YAML files, environment variables, and command line arguments. The
+same variables can be defined via any of them (but while the path to
+the first-level configuration file can even be defined in the file
+itself, only values passed by env variable or command line are useful,
+of course). Environment variables override YAML file entries, and
+command line arguments override both.
+
+The system works off metadata on the structs making up the
+configuration data.
+
+The metadata is converted at runtime (via
+[`ConfigStruct`](../include/config/config_specification.h)) to a flat
+representation, a vector of tuples of
+[`ConfigKeyPath`](../include/config/config_key_path.h) (list of key segment strings) and
+reference to [`ConfigValue`](../include/config/config_specification.h) (the metadata on a
+struct field). This vector is the basis to build the help text,
+or to map to vectors or key/value representations for the source
+in question.
+
+Each source ([command line arguments](XX), [environment variables](XX),
+[yaml file](XX)) has its individual constructor and error handling
+during construction. The resulting object must implement
+[`VerifyConfigSource`](../include/config/config_specification.h), the `verify` method of
+which takes the config values vector mentioned in the previous
+paragraph, and returns an object that implements
+[`VerifiedConfigSource`](../include/config/config_backend.h). This is then, inside
+[`raw_get_config`](XX), passed to the
+[`OverwriteFrom::overwrite_from`](XX?) method to
+fill the fields of the to-be configured struct with the values
+destined for them.
+
+To make this work, each configurable struct needs to implement
+[`OverwriteFrom`](XX?), additionally, the top-level
+configurable struct needs to implement
+[`ToplevelConfig`](XX). To provide that latter
+implementation, the top-level config struct should have a boolean
+help field, and a field to take a path to the config file that
+should be read, if given.
+
+The process of going through the 3 sources, and reading the config
+file that was specified by the user, is handled by the
+aforementiond `raw_get_config` function. All this
+function needs is a reference to the (remaining) command line
+arguments to be parsed, and a reference to the struct metadata for
+the toplevel configuration struct. It returns the filled-in
+struct, of the given type parameter which must match the metadata
+that was given.
+
+For more information (with quite some overlap with this description),
+see [`config_source_interface`](../include/config/config_backend.h).
diff --git a/endToEndTests/test/queries/fasta_allTestSequences.json b/endToEndTests/test/queries/fasta_allTestSequences.json
deleted file mode 100644
index 17e807369..000000000
--- a/endToEndTests/test/queries/fasta_allTestSequences.json
+++ /dev/null
@@ -1,118 +0,0 @@
-{
- "testCaseName": "Get the unaligned fasta for all test sequences",
- "query": {
- "action": {
- "type": "Fasta",
- "sequenceName": "testSecondSequence",
- "orderByFields": ["gisaid_epi_isl"]
- },
- "filterExpression": {
- "type": "True"
- }
- },
- "expectedQueryResult": [
- { "gisaid_epi_isl": "EPI_ISL_1001493", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1001920", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1002052", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1002156", "testSecondSequence": "ACGN" },
- { "gisaid_epi_isl": "EPI_ISL_1003010", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003036", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003373", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003425", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003519", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003629", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1003849", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1004495", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1005148", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1036103", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1080536", "testSecondSequence": "ATGT" },
- { "gisaid_epi_isl": "EPI_ISL_1119315", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1119584", "testSecondSequence": "ACGN" },
- { "gisaid_epi_isl": "EPI_ISL_1129663", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1130868", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1131102", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1195052", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1260480", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1273458", "testSecondSequence": "ANGT" },
- { "gisaid_epi_isl": "EPI_ISL_1273715", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1360935", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1361468", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1407962", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1408062", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1408408", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1408805", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1597890", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1597932", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1599113", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1682849", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1747752", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1747885", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1748215", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1748243", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1748395", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1749892", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1749899", "testSecondSequence": "AAGN" },
- { "gisaid_epi_isl": "EPI_ISL_1749960", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1750503", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1750868", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1760534", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_1840634", "testSecondSequence": "ACGN" },
- { "gisaid_epi_isl": "EPI_ISL_2016901", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2017036", "testSecondSequence": "ANGT" },
- { "gisaid_epi_isl": "EPI_ISL_2019235", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2019350", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2086867", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2180023", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2180995", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2181005", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2213804", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2213934", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2213984", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2214128", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2270139", "testSecondSequence": null },
- { "gisaid_epi_isl": "EPI_ISL_2307766", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2307888", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2308054", "testSecondSequence": null },
- { "gisaid_epi_isl": "EPI_ISL_2359636", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2360326", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2367431", "testSecondSequence": "NCGT" },
- { "gisaid_epi_isl": "EPI_ISL_2374969", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2375097", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2375165", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2375247", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2375490", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2379651", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2405276", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2408472", "testSecondSequence": "AAGT" },
- { "gisaid_epi_isl": "EPI_ISL_2544226", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2544332", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2544452", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_2574088", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3016465", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3086369", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3128737", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3128796", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3128811", "testSecondSequence": "ACGTACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3247294", "testSecondSequence": null },
- {
- "gisaid_epi_isl": "EPI_ISL_3259931",
- "testSecondSequence": "JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE"
- },
- { "gisaid_epi_isl": "EPI_ISL_3267832", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3465556", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3465732", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_3578231", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_466942", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_581968", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_721941", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_737604", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_737715", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_737860", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_768148", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_830864", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_899725", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_899762", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_931031", "testSecondSequence": "ACGT" },
- { "gisaid_epi_isl": "EPI_ISL_931279", "testSecondSequence": "ACGT" }
- ]
-}
diff --git a/endToEndTests/test/queries/nOf_2of3_details.json b/endToEndTests/test/queries/nOf_2of3_details.json
deleted file mode 100644
index 3e7bcd5cc..000000000
--- a/endToEndTests/test/queries/nOf_2of3_details.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
- "testCaseName": "N-Of query requesting 2 of 3 mutations with details action",
- "query": {
- "action": {
- "type": "Details",
- "randomize": {
- "seed": 1232
- }
- },
- "filterExpression": {
- "type": "N-Of",
- "numberOfMatchers": 2,
- "matchExactly": false,
- "children": [
- {
- "type": "NucleotideEquals",
- "position": 1,
- "symbol": "-"
- },
- {
- "type": "NucleotideEquals",
- "position": 2,
- "symbol": "T"
- },
- {
- "type": "NucleotideEquals",
- "position": 27542,
- "symbol": "N"
- }
- ]
- }
- },
- "expectedQueryResult": [
- {
- "age": 58,
- "country": "Switzerland",
- "date": "2021-04-28",
- "division": "Basel-Stadt",
- "gisaid_epi_isl": "EPI_ISL_2019235",
- "pango_lineage": "B.1.1.7",
- "qc_value": 0.9,
- "region": "Europe",
- "test_boolean_column": false,
- "unsorted_date": "2021-01-22"
- },
- {
- "age": 50,
- "country": "Switzerland",
- "date": "2020-11-13",
- "division": "Solothurn",
- "gisaid_epi_isl": "EPI_ISL_1005148",
- "pango_lineage": "B.1.221",
- "qc_value": 0.92,
- "region": "Europe",
- "test_boolean_column": null,
- "unsorted_date": "2020-12-17"
- },
- {
- "age": 50,
- "country": "Switzerland",
- "date": "2021-02-23",
- "division": "Solothurn",
- "gisaid_epi_isl": "EPI_ISL_1195052",
- "pango_lineage": "B.1.1.7",
- "qc_value": 0.95,
- "region": "Europe",
- "test_boolean_column": null,
- "unsorted_date": "2021-07-04"
- },
- {
- "age": 54,
- "country": "Switzerland",
- "date": "2021-03-19",
- "division": "Solothurn",
- "gisaid_epi_isl": "EPI_ISL_1597932",
- "pango_lineage": "B.1.1.7",
- "qc_value": 0.94,
- "region": "Europe",
- "test_boolean_column": true,
- "unsorted_date": "2021-02-10"
- }
- ]
-}
diff --git a/include/config/backend/command_line_arguments.h b/include/config/backend/command_line_arguments.h
new file mode 100644
index 000000000..3a0dc8727
--- /dev/null
+++ b/include/config/backend/command_line_arguments.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include