diff --git a/.gitignore b/.gitignore index de3b70f26..8ba775916 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,6 @@ conanprofile **/temp/ **/node_modules/ + + +/test* diff --git a/.run/silo --api.run.xml b/.run/silo --api.run.xml deleted file mode 100644 index 734ce51dd..000000000 --- a/.run/silo --api.run.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.run/silo --preprocessing.run.xml b/.run/silo --preprocessing.run.xml deleted file mode 100644 index 68b55e50d..000000000 --- a/.run/silo --preprocessing.run.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.run/silo api.run.xml b/.run/silo api.run.xml new file mode 100644 index 000000000..c4be1a657 --- /dev/null +++ b/.run/silo api.run.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/.run/silo preprocessing.run.xml b/.run/silo preprocessing.run.xml new file mode 100644 index 000000000..673abf181 --- /dev/null +++ b/.run/silo preprocessing.run.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 963255ef2..44f4379c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,7 @@ file(GLOB_RECURSE SRC_SILO "src/*.cpp") list(REMOVE_ITEM SRC_SILO ${SRC_TEST}) set(SRC_SILO_WITHOUT_MAIN ${SRC_SILO}) -list(REMOVE_ITEM SRC_SILO_WITHOUT_MAIN "${CMAKE_SOURCE_DIR}/src/silo_api/api.cpp") +list(REMOVE_ITEM SRC_SILO_WITHOUT_MAIN "${CMAKE_SOURCE_DIR}/src/main.cpp") # --------------------------------------------------------------------------- # Linter @@ -105,9 +105,9 @@ target_link_libraries( re2::re2 ) -add_executable(siloApi "${CMAKE_SOURCE_DIR}/src/silo_api/api.cpp" $) +add_executable(silo "${CMAKE_SOURCE_DIR}/src/main.cpp" $) target_link_libraries( - siloApi + silo PUBLIC silolib ) diff --git a/Dockerfile b/Dockerfile index a2d904b35..4f2ba657d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ COPY . ./ RUN \ python3 ./build_with_conan.py --release --parallel 4\ && cp build/Release/silo_test . \ - && cp build/Release/siloApi . + && cp --no-dereference build/Release/{silo,siloServer,siloPreprocessor} . FROM ubuntu:22.04 AS server @@ -15,7 +15,7 @@ FROM ubuntu:22.04 AS server WORKDIR /app COPY docker_default_preprocessing_config.yaml ./default_preprocessing_config.yaml COPY docker_runtime_config.yaml ./runtime_config.yaml -COPY --from=builder /src/siloApi ./ +COPY --from=builder /src/{silo,siloServer,siloPreprocessor} ./ RUN apt update && apt dist-upgrade -y \ && apt install -y libtbb12 curl jq @@ -25,7 +25,7 @@ HEALTHCHECK --start-period=20s CMD curl --fail --silent localhost:8081/info | jq EXPOSE 8081 -ENTRYPOINT ["./siloApi"] +ENTRYPOINT ["./silo"] LABEL org.opencontainers.image.source="https://github.com/GenSpectrum/LAPIS-SILO" LABEL org.opencontainers.image.description="Sequence Indexing engine for Large Order of genomic data" diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..4fe83295c --- /dev/null +++ b/Makefile @@ -0,0 +1,61 @@ +# Note: you can set the COLOR environment variable to 1 to get the log +# files produced with color codes. + +export PATH := bin:$(PATH) + +all: build/siloServer build/siloPreprocessor build/silo_test + +clean: + find build -name "*.o" -print0 | xargs -0 rm -f + rm -f build/siloApi build/silo_test + +# Have separate targets for the binaries, but share a single build +# (faster to build them both?); use `run-cached` to only actually run one +# build, though. + +build/siloServer: build/silo + +build/siloPreprocessor: build/silo + +build/silo: $(shell bin/cplusplus-source-files) + run-cached build/.exit-code build $@ + +build/silo_test: $(shell bin/cplusplus-source-files) + run-cached build/.exit-code build $@ + + +# Tests produce log files if successful (if not successful, the log +# file can be found with .tmp appended, but is also printed to stdout). + +build/unit-tests.log: build/silo_test + run-with-log $@ build/silo_test + +build/preprocessing-tsv.log: \ + build/siloPreprocessor \ + $(shell bin/preprocessing-input-files-in testBaseData/exampleDataset) + run-with-log $@ preprocessing-in testBaseData/exampleDataset + +build/preprocessing-ndjson.log: \ + build/siloPreprocessor \ + $(shell bin/preprocessing-input-files-in testBaseData/exampleDatasetAsNdjson) + run-with-log $@ preprocessing-in testBaseData/exampleDatasetAsNdjson + +build/tsv-tests.log: build/siloServer build/preprocessing-tsv.log $(shell bin/test-query-files) + run-with-log $@ runtests-e2e testBaseData/exampleDataset 7001 + +build/ndjson-tests.log: build/siloServer build/preprocessing-ndjson.log $(shell bin/test-query-files) + run-with-log $@ runtests-e2e testBaseData/exampleDatasetAsNdjson 7002 + +test: build/unit-tests.log build/tsv-tests.log build/ndjson-tests.log + + +# Manually run the api so that it can be queried interactively. + +runapi-tsv: build/siloServer build/preprocessing-tsv.log + runapi-in testBaseData/exampleDataset 8081 + +runapi-ndjson: build/siloServer build/preprocessing-tsv.log + runapi-in testBaseData/exampleDatasetAsNdjson 8081 + + +.PHONY: all clean test runapi-tsv runapi-ndjson diff --git a/bin/build b/bin/build new file mode 100755 index 000000000..ab8ba50ce --- /dev/null +++ b/bin/build @@ -0,0 +1,52 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "usage: $0 [...]" + echo " Auto-detect the mold linker and ninja builder and" + echo " call cmake accordingly. Requires up to date configuration" + echo " via conan first. Currently only fully runs on Linux and" + echo " Mac OS, detection for core count on Windows is missing." + echo " Arguments are passed on to make/ninja." + exit 1 +} + +if [[ $# -gt 0 ]] && { [[ "$1" = "-h" ]] || [[ "$1" = "--help" ]] ; }; then + usage +fi + +args=(../) + +if _mold_path=$(which mold); then + args=("${args[@]}" -D CMAKE_EXE_LINKER_FLAGS=-fuse-ld=mold) +fi + +if _ninja_path=$(which ninja); then + args=("${args[@]}" -G Ninja) + make=(ninja) +else + # Linux: /proc/cpuinfo + # Mac OS: sysctl -n hw.ncpu + corecount=$(grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu) + make=(make "-j$corecount") +fi + +_do() { + echo "+" "$@" + "$@" +} + +_do cd build +_do cmake "${args[@]}" +# Make all binaries.. +_do "${make[@]}" + +# ..but touch the desired ones afterwards for the top-level Makefile's +# sake (XX sigh, alternatives?) +#if [ $# -gt 0 ]; then +# cd .. +# touch "$@" +#fi +# --- nah, just let it re-run build, sadly, still better than re-running tests unnecessarily. + diff --git a/bin/cplusplus-source-files b/bin/cplusplus-source-files new file mode 100755 index 000000000..f599056eb --- /dev/null +++ b/bin/cplusplus-source-files @@ -0,0 +1,6 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +find src/ include/ -name "*.h" -o -name "*.cpp" | grep -v '#' + diff --git a/bin/gco-make-test b/bin/gco-make-test new file mode 100755 index 000000000..da5c7773f --- /dev/null +++ b/bin/gco-make-test @@ -0,0 +1,11 @@ +#!/bin/bash +set -emuo pipefail +IFS= + +set -x + +while read -r commit; do + git checkout "$commit" + make -j16 test +done + diff --git a/bin/preprocessing-in b/bin/preprocessing-in new file mode 100755 index 000000000..780bbadb9 --- /dev/null +++ b/bin/preprocessing-in @@ -0,0 +1,33 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "$0 test-input-dir" >&2 + false +} + +if [[ $# != 1 ]]; then + usage +fi +if [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then + usage +fi +testdir="$1" +mypath=$(readlink -f "$0") +mydir=$(dirname "$mypath") +builddir=$(dirname "$mydir")/build + +IFS=' ' + +set +x + +rm -rf temp + +cd "$testdir" + +rm -rf {output,temp,test?*} + + +#SPDLOG_LEVEL=debug +${DEBUGGER-} "$builddir"/siloPreprocessor diff --git a/bin/preprocessing-input-files-in b/bin/preprocessing-input-files-in new file mode 100755 index 000000000..f14b06554 --- /dev/null +++ b/bin/preprocessing-input-files-in @@ -0,0 +1,7 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +# preprocessing inputs +bin/verified-find "$1" -maxdepth 1 -a \( -name "*.fasta" -o -name "*.yaml" -o -name "*.xz" \ + -o -name "*.zst" -o -name "*.json" -o -name "*.tsv" -o -name "*.ndjson" \) diff --git a/bin/run-cached b/bin/run-cached new file mode 100755 index 000000000..3bdb623b2 --- /dev/null +++ b/bin/run-cached @@ -0,0 +1,80 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "usage: $0 [-v|--verbose] file-for-exitcode cmd [args...]" + echo " Takes a lock on file-for-exitcode, if it gets the lock," + echo " runs cmd with args and stores the exit code in file-for-exitcode." + echo " If the file is already locked, waits until unlocked then reads" + echo " the file contents as the exit code." + echo " Exits with the exitcode in either case." + echo " If -v/--verbose is given, prints to stderr when it's waiting for" + echo " another instance." + exit 1 +} + +if [[ $# -lt 1 ]]; then + usage +fi + +if [[ "$1" = -v ]] || [[ "$1" = --verbose ]]; then + verbose=1 + shift +else + verbose=0 +fi + +if [[ $# -lt 2 ]]; then + usage +fi + +cachepath="$1" +shift + +info() { + if [[ "$verbose" = 1 ]]; then + echo "$@" >&2 + fi +} + + +# Open the cache file read/write to an unused file descriptor. +exec {fd}<>"$cachepath" + +if flock -E77 --nonblock "$fd"; then + # We have an exclusive lock, so we are supposed to run the cmd; + # don't fail if it doesn't exit successfully. + + # Remove previously stored code. (Have to reopen, `>&"$fd"` does + # not truncate.) + true > "$cachepath" + + if "$@"; then + code=$? + else + code=$? + fi + echo "$code" >&"$fd" + exit "$code" +else + e=$? + if [[ "$e" == 77 ]]; then + # Another instance is running the command already; wait it out + # then read out the exit code. + info -n "$0 $cachepath: waiting for concurrent run to finish..." + flock "$fd" + # `read` will fail if the file is empty (possible if the other + # $0-instance was killed) + if read -r code <&"$fd"; then + info "exited with code $code" + exit "$code" + else + info "other $0 instance was killed" + exit 130 + fi + else + echo "$0: got exit code $e, is the 'flock' command not available?" >&2 + false + fi +fi diff --git a/bin/run-with-log b/bin/run-with-log new file mode 100755 index 000000000..968cb3752 --- /dev/null +++ b/bin/run-with-log @@ -0,0 +1,43 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "$0 logfile cmd [args...]" + echo " Renames logfile to logfile.old, then runs cmd with args," + echo " redirecting its stdout/stderr to logfile.tmp, when successful" + echo " renames logfile.tmp to logfile." + echo " If not successful, prints logfile.tmp to stdout and fails." + echo + echo " If the COLOR env variable is true, the cmd will not see the " + echo " pipe and hence will colorize as if running under a terminal." + echo " This requites the 'unbuffer' tool from the 'expect' package " + echo " to be installed." + exit 1 +} + +if [[ $# -lt 2 ]]; then + usage +fi + +logfile="$1" +shift + +if [ -e "$logfile" ]; then + mv "$logfile" "$logfile".old +fi + +_run() { + if [[ "${COLOR-0}" = 0 ]]; then + "$@" + else + unbuffer "$@" + fi +} + +if _run "$@" > "$logfile".tmp 2>&1; then + mv "$logfile".tmp "$logfile" +else + P="$logfile".tmp perl -wne 's/^/$ENV{P}\t/; print' < "$logfile".tmp + false +fi diff --git a/bin/runapi-in b/bin/runapi-in new file mode 100755 index 000000000..1b8f91e10 --- /dev/null +++ b/bin/runapi-in @@ -0,0 +1,23 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "$0 test-input-dir port-number" >&2 + false +} + +if [[ $# != 2 ]]; then + usage +fi +testdir="$1" +portnumber="$2" + +IFS=' ' + +set -x + +cd "$testdir" + +export SPDLOG_LEVEL=${SPDLOG_LEVEL-debug} +exec ${DEBUGGER-} ../../build/siloServer --api-port "$portnumber" diff --git a/bin/runtests-e2e b/bin/runtests-e2e new file mode 100755 index 000000000..78d49efdd --- /dev/null +++ b/bin/runtests-e2e @@ -0,0 +1,48 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +#set -x + +usage() { + echo "$0 test-input-dir port-number" >&2 + false +} + +if [[ $# != 2 ]]; then + usage +fi +testdir="$1" +portnumber="$2" + + +runapi-in "$testdir" "$portnumber" & +apipid=$(jobs -p) + +cleanup () { + kill -9 "$apipid" || true +} +trap cleanup EXIT + +# Wait until the API is ready +tries=300 +while true; do + if res=$(curl --silent --fail-early --data '{}' http://localhost:"$portnumber"/query); then + if echo " $res"| grep -q "Database not initialized yet"; then + true # continue + else + break + fi + fi + tries=$(( tries - 1 )) + if [[ $tries < 1 ]]; then + echo "Timeout waiting for the database to be ready." + exit 1 + fi + sleep 1 +done + +# Run the tests +SILO_URL=127.0.0.1:"$portnumber" node --test --test-reporter=tap + +# Bash afterwards kills the API process via `cleanup`. diff --git a/bin/test-query-files b/bin/test-query-files new file mode 100755 index 000000000..c053bb75e --- /dev/null +++ b/bin/test-query-files @@ -0,0 +1,7 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +# query inputs +bin/verified-find endToEndTests/test/ -name "*.json" + diff --git a/bin/verified-find b/bin/verified-find new file mode 100755 index 000000000..d940e292b --- /dev/null +++ b/bin/verified-find @@ -0,0 +1,45 @@ +#!/bin/bash +set -meuo pipefail +IFS= + +usage() { + echo "usage: $0 dir [more find arguments ...]" + echo " Runs find with the given dir and arguments," + echo " but if it can run git ls-files on the same dir," + echo " verifies if both give the same output and warns" + echo " if not." + false +} + +if [[ $# -lt 1 ]]; then + usage +fi +if [[ "$1" = -h ]] || [[ "$1" = --help ]]; then + usage +fi + +tmp1=$(mktemp) +tmp2=$(mktemp) +tmp3=$(mktemp) + +find "$@" | LANG=C sort > "$tmp1" +cat "$tmp1" + +# oh, have to exclude the .js files in the endToEndTests/test/ +# case; there are TWO subdirectories with test files in +# testBaseData, but who knows if that changes in the future? +if git ls-files "$1" | grep -v '\.js$' | LANG=C sort > "$tmp2" 2>/dev/null; then + if [ -s "$tmp2" ]; then + if diff -u "$tmp1" "$tmp2" > "$tmp3"; then + true + else + { + echo "Warning: $0: '$1' yielded different results for find vs. git:" + cat "$tmp3" + } >&2 + # but continue running. + fi + fi +fi + +rm -f "$tmp1" "$tmp2" "$tmp3" diff --git a/doc/config.md b/doc/config.md new file mode 100644 index 000000000..a64afc637 --- /dev/null +++ b/doc/config.md @@ -0,0 +1,53 @@ +# How the configuration system works + +SILO takes configuration information from 3 configuration sources: +YAML files, environment variables, and command line arguments. The +same variables can be defined via any of them (but while the path to +the first-level configuration file can even be defined in the file +itself, only values passed by env variable or command line are useful, +of course). Environment variables override YAML file entries, and +command line arguments override both. + +The system works off metadata on the structs making up the +configuration data. + +The metadata is converted at runtime (via +[`ConfigStruct`](../include/config/config_specification.h)) to a flat +representation, a vector of tuples of +[`ConfigKeyPath`](../include/config/config_key_path.h) (list of key segment strings) and +reference to [`ConfigValue`](../include/config/config_specification.h) (the metadata on a +struct field). This vector is the basis to build the help text, +or to map to vectors or key/value representations for the source +in question. + +Each source ([command line arguments](XX), [environment variables](XX), +[yaml file](XX)) has its individual constructor and error handling +during construction. The resulting object must implement +[`VerifyConfigSource`](../include/config/config_specification.h), the `verify` method of +which takes the config values vector mentioned in the previous +paragraph, and returns an object that implements +[`VerifiedConfigSource`](../include/config/config_backend.h). This is then, inside +[`raw_get_config`](XX), passed to the +[`OverwriteFrom::overwrite_from`](XX?) method to +fill the fields of the to-be configured struct with the values +destined for them. + +To make this work, each configurable struct needs to implement +[`OverwriteFrom`](XX?), additionally, the top-level +configurable struct needs to implement +[`ToplevelConfig`](XX). To provide that latter +implementation, the top-level config struct should have a boolean +help field, and a field to take a path to the config file that +should be read, if given. + +The process of going through the 3 sources, and reading the config +file that was specified by the user, is handled by the +aforementiond `raw_get_config` function. All this +function needs is a reference to the (remaining) command line +arguments to be parsed, and a reference to the struct metadata for +the toplevel configuration struct. It returns the filled-in +struct, of the given type parameter which must match the metadata +that was given. + +For more information (with quite some overlap with this description), +see [`config_source_interface`](../include/config/config_backend.h). diff --git a/endToEndTests/test/queries/fasta_allTestSequences.json b/endToEndTests/test/queries/fasta_allTestSequences.json deleted file mode 100644 index 17e807369..000000000 --- a/endToEndTests/test/queries/fasta_allTestSequences.json +++ /dev/null @@ -1,118 +0,0 @@ -{ - "testCaseName": "Get the unaligned fasta for all test sequences", - "query": { - "action": { - "type": "Fasta", - "sequenceName": "testSecondSequence", - "orderByFields": ["gisaid_epi_isl"] - }, - "filterExpression": { - "type": "True" - } - }, - "expectedQueryResult": [ - { "gisaid_epi_isl": "EPI_ISL_1001493", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1001920", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1002052", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1002156", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_1003010", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003036", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003373", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003425", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003519", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003629", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003849", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1004495", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1005148", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1036103", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1080536", "testSecondSequence": "ATGT" }, - { "gisaid_epi_isl": "EPI_ISL_1119315", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1119584", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_1129663", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1130868", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1131102", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1195052", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1260480", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1273458", "testSecondSequence": "ANGT" }, - { "gisaid_epi_isl": "EPI_ISL_1273715", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1360935", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1361468", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1407962", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408062", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408408", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408805", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1597890", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1597932", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1599113", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1682849", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1747752", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1747885", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748215", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748243", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748395", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1749892", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1749899", "testSecondSequence": "AAGN" }, - { "gisaid_epi_isl": "EPI_ISL_1749960", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1750503", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1750868", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1760534", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1840634", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_2016901", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2017036", "testSecondSequence": "ANGT" }, - { "gisaid_epi_isl": "EPI_ISL_2019235", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2019350", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2086867", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2180023", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2180995", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2181005", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2213804", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2213934", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2213984", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2214128", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2270139", "testSecondSequence": null }, - { "gisaid_epi_isl": "EPI_ISL_2307766", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2307888", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2308054", "testSecondSequence": null }, - { "gisaid_epi_isl": "EPI_ISL_2359636", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2360326", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2367431", "testSecondSequence": "NCGT" }, - { "gisaid_epi_isl": "EPI_ISL_2374969", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375097", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375165", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375247", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375490", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2379651", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2405276", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2408472", "testSecondSequence": "AAGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544226", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544332", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544452", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2574088", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3016465", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3086369", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128737", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128796", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128811", "testSecondSequence": "ACGTACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3247294", "testSecondSequence": null }, - { - "gisaid_epi_isl": "EPI_ISL_3259931", - "testSecondSequence": "JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE" - }, - { "gisaid_epi_isl": "EPI_ISL_3267832", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3465556", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3465732", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3578231", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_466942", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_581968", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_721941", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737604", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737715", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737860", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_768148", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_830864", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_899725", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_899762", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_931031", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_931279", "testSecondSequence": "ACGT" } - ] -} diff --git a/endToEndTests/test/queries/nOf_2of3_details.json b/endToEndTests/test/queries/nOf_2of3_details.json deleted file mode 100644 index 3e7bcd5cc..000000000 --- a/endToEndTests/test/queries/nOf_2of3_details.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "testCaseName": "N-Of query requesting 2 of 3 mutations with details action", - "query": { - "action": { - "type": "Details", - "randomize": { - "seed": 1232 - } - }, - "filterExpression": { - "type": "N-Of", - "numberOfMatchers": 2, - "matchExactly": false, - "children": [ - { - "type": "NucleotideEquals", - "position": 1, - "symbol": "-" - }, - { - "type": "NucleotideEquals", - "position": 2, - "symbol": "T" - }, - { - "type": "NucleotideEquals", - "position": 27542, - "symbol": "N" - } - ] - } - }, - "expectedQueryResult": [ - { - "age": 58, - "country": "Switzerland", - "date": "2021-04-28", - "division": "Basel-Stadt", - "gisaid_epi_isl": "EPI_ISL_2019235", - "pango_lineage": "B.1.1.7", - "qc_value": 0.9, - "region": "Europe", - "test_boolean_column": false, - "unsorted_date": "2021-01-22" - }, - { - "age": 50, - "country": "Switzerland", - "date": "2020-11-13", - "division": "Solothurn", - "gisaid_epi_isl": "EPI_ISL_1005148", - "pango_lineage": "B.1.221", - "qc_value": 0.92, - "region": "Europe", - "test_boolean_column": null, - "unsorted_date": "2020-12-17" - }, - { - "age": 50, - "country": "Switzerland", - "date": "2021-02-23", - "division": "Solothurn", - "gisaid_epi_isl": "EPI_ISL_1195052", - "pango_lineage": "B.1.1.7", - "qc_value": 0.95, - "region": "Europe", - "test_boolean_column": null, - "unsorted_date": "2021-07-04" - }, - { - "age": 54, - "country": "Switzerland", - "date": "2021-03-19", - "division": "Solothurn", - "gisaid_epi_isl": "EPI_ISL_1597932", - "pango_lineage": "B.1.1.7", - "qc_value": 0.94, - "region": "Europe", - "test_boolean_column": true, - "unsorted_date": "2021-02-10" - } - ] -} diff --git a/include/config/backend/command_line_arguments.h b/include/config/backend/command_line_arguments.h new file mode 100644 index 000000000..3a0dc8727 --- /dev/null +++ b/include/config/backend/command_line_arguments.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace silo::config { + +class CommandLineArguments : public ConfigBackend { + std::vector args; + + public: + explicit CommandLineArguments(std::span args_) + : args(args_.begin(), args_.end()) {} + + [[nodiscard]] constexpr std::string_view errorContext() const { + return "command line arguments"; + }; + + [[nodiscard]] VerifiedConfigSource verify(const ConfigSpecification& config_specification + ) const override; + + static std::string configKeyPathToString(const ConfigKeyPath& key_path); + + // TODO maybe rename because return type changed? + static AmbiguousConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string); +}; + +} // namespace silo::config diff --git a/include/config/backend/environment_variables.h b/include/config/backend/environment_variables.h new file mode 100644 index 000000000..6055d27d0 --- /dev/null +++ b/include/config/backend/environment_variables.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +#include "config/config_backend.h" +#include "silo/config/util/config_exception.h" + +extern const char* const* environ; + +namespace silo::config { + +// (Inheriting implementation for ConfigSource directly.) +class EnvironmentVariables : public ConfigBackend { + /* EnvironmentVariables base, */ + std::vector> alist; + + explicit EnvironmentVariables(std::vector>&& alist_) + : alist(std::move(alist_)){}; + + explicit EnvironmentVariables(){}; + + public: + [[nodiscard]] VerifiedConfigSource verify(const ConfigSpecification& config_specification + ) const override; + + static EnvironmentVariables decodeEnvironmentVariables(const char* const* envp = environ); + + [[nodiscard]] constexpr std::string_view errorContext() const { + return "environment variables"; + }; + + static std::string configKeyPathToString(const ConfigKeyPath& key_path); + + // TODO maybe rename because return type changed? + static AmbiguousConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string); +}; + +} // namespace silo::config diff --git a/include/config/backend/yaml_file.h b/include/config/backend/yaml_file.h new file mode 100644 index 000000000..c0c0f8f92 --- /dev/null +++ b/include/config/backend/yaml_file.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include + +#include "config/config_backend.h" + +namespace silo::config { + +// Todo rename to YamlConfig +class YamlFile : public ConfigBackend { + std::string error_context; + std::unordered_map yaml_fields; + + YamlFile(std::string error_context, std::unordered_map yaml_fields) + : error_context(std::move(error_context)), + yaml_fields(std::move(yaml_fields)) {} + + std::string errorContext() const; + + public: + [[nodiscard]] VerifiedConfigSource verify(const ConfigSpecification& config_specification + ) const override; + + const std::unordered_map& getYamlFields() const; + + static YamlFile readFile(const std::filesystem::path& path); + + static YamlFile fromYAML(const std::string& error_context, const std::string& yaml_string); + + static std::string configKeyPathToString(const ConfigKeyPath& key_path); + + static ConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string); +}; + +} // namespace silo::config diff --git a/include/config/config_backend.h b/include/config/config_backend.h new file mode 100644 index 000000000..7490dd05d --- /dev/null +++ b/include/config/config_backend.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "config/config_specification.h" +#include "config/verified_config_source.h" + +// TODO adapt doc +//! Interfaces for configuration access. +//! +//! The goals are: +//! +//! * allow for configuration files (e.g. YAML), environment +//! variables, and command line options, and have them shadow +//! (override) each other (in this order). +//! * report any I/O errors (e.g. when reading YAML files or +//! decoding unicode), obviously. +//! * report unknown configuration keys. +//! * report invalid configuration value formats. +//! +//! To achieve these goals, each of those configuration sources (YAML +//! or perhaps other kinds of files, env vars, command line arguments) +//! needs readers that implement the interfaces here. Each reader goes +//! through these steps: +//! +//! 0. Optionally have a parameterless type that only implements +//! option key path formatting. +//! +//! 1. Have a first stage object that contains the result of reading +//! the actual source (e.g. file) and reporting I/O errors; +//! there's no interface for this since this is specific to each +//! source. +//! +//! 2. The VerifyConfigSource::verify function that this object +//! implements checks that all found keys are OK, and returns a +//! VerifiedConfigSource object. +//! +//! 3. To fill in a to be configured struct, the +//! `VerifiedConfigSource` object is queried for each field key +//! and the struct field is set via OverwriteFrom::overwrite_from +//! implemented on the struct in question. `VerifiedConfigSource` +//! is also implemented for `ConfigStruct`, that way +//! overwrite_from can also be used identically to initialize a +//! struct with the default values. (Note: the configuration +//! structs need to implement `Default`, too, so that they can be +//! created first for the following chain of side effects; but +//! this just sets them to the defaults for each contained data +//! type via derive. Be careful not to forget to overwrite from +//! the ConfigStruct!) +//! +//! To reiterate, step 3 is applied to a particular struct first for +//! the defaults and then for all config sources in order to achieve +//! the shadowing effect (via `overwrite_from`). +//! +//! The information about valid keys as well as optional default +//! values for them is declared via +//! `super::config_metadata::ConfigStruct` values. They contain only a +//! single-string key for each field (representing one config key path +//! segment), and the full path for each field is constructed from +//! the nesting of the ConfigStruct instances (a tree). +//! + +namespace silo::config { + +/// Config keys (represented via the type `ConfigKeyPath`) are lists +/// of strings in camel case, and used as such in yaml config +/// files. For command line arguments those are translated to kebab +/// case (lower-case joined '-' before uppercase characters), for +/// environment variables to uppercase with underscores and prefixed +/// with "SILO_". Multi-segment paths are treated as nested +/// dictionaries in yaml config files, joined with '-' for command +/// line arguments and '_' for environment variables. `ConfigSource` +/// provides the means to do this type-specific conversion + +/// A ConfigSource is providing I/O-error free access to a set of +/// unverified configuration data. +class ConfigBackend { + public: + /// Verify that all user-presented *keys* in `self` are + /// valid. (Correctness check of the *values* only happens later + /// via `get`.) Throws [silo::config::ConfigException] on + /// verification errors (you could subclass those as + /// InvalidConfigKeyError, ParseError). May consume/move `this`. + [[nodiscard]] virtual VerifiedConfigSource verify(const ConfigSpecification& config_specification + ) const = 0; +}; + +} // namespace silo::config diff --git a/include/config/config_interface.h b/include/config/config_interface.h new file mode 100644 index 000000000..ec302f7a9 --- /dev/null +++ b/include/config/config_interface.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include + +#include "config/backend/command_line_arguments.h" +#include "config/backend/environment_variables.h" +#include "config/backend/yaml_file.h" +#include "config/config_backend.h" +#include "config/config_specification.h" +#include "silo/common/cons_list.h" +#include "silo/common/overloaded.h" +#include "silo/config/util/config_exception.h" + +namespace silo::config { + +/// For config structs (containing help and possibly config file paths): +/// We use a concept instead of virtual method overrides. +/// This is because we want to call the virtual method overwriteFrom for the default values when +/// constructing a Config. Instead, making the constructor private and instead creating a factory +/// method would also be possible. Here, a concept works great, because the only usage of the +/// interface uses a template anyways, due to the different return types (RuntimeConfig vs. +/// PreprocessingConfig), whose easily accessible structure should remain. +template +concept Config = requires(C c, const C cc, const VerifiedConfigSource& config_source) { + /// The specification which + { C::getConfigSpecification() } -> std::same_as; + + /// Whether the user gave the --help option or environment + /// variable equivalent. + /// bool asksForHelp() const = 0; + { cc.asksForHelp() } -> std::same_as; + + /// Optional config file that the user gave (or that is provided + /// by the type via its defaults) that should be loaded. + /// std::optional configPath() const = 0; + { cc.configPath() } -> std::same_as>; + + /// Overwrite the fields of an instance of the target type; done + /// that way so that multiple kinds of config sources can shadow + /// each other's values by application in sequence. `parents` is + /// the upwards path to the root of the struct tree (use + /// .to_vec_reverse() and wrap in ConfigKeyPath). Throws + /// `silo::config::ConfigException` for config value parse errors + /// (subclass as ConfigValueParseError?). + /// void overwriteFrom(const VerifiedConfigSource& config_source) = 0; + { c.overwriteFrom(config_source) } -> std::same_as; + + /// Validation / Sanity checks about the values of this config + /// void validate() = 0; + { c.validate() } -> std::same_as; +}; + +/// In case of error, returns the exit code that the caller should +/// pass to exit(): 0 if the user gave --help, 1 in case of erroneous +/// usage (the error is already printed in that case). +template +std::variant getConfig(std::span cmd) { + const auto config_specification = C::getConfigSpecification(); + try { + auto env_source = + EnvironmentVariables::decodeEnvironmentVariables().verify(config_specification); + auto cmd_source = CommandLineArguments{cmd}.verify(config_specification); + + C config; + + // First, only check command line arguments, for "--help"; avoid + // potential errors from env processing, and we don't have the + // path to the config file yet. + config = {}; + config.overwriteFrom(cmd_source); + if (config.asksForHelp()) { + std::cout << config_specification.helpText() << "\n" << std::flush; + return 0; + } + config.overwriteFrom(env_source); + + // Was a config file given as an argument or by environment variable? + auto config_path = config.configPath(); + if (config_path.has_value()) { + auto file_source = YamlFile::readFile(*config_path).verify(config_specification); + // Now read again with the file first: + config = {}; + config.overwriteFrom(file_source); + config.overwriteFrom(env_source); + config.overwriteFrom(cmd_source); + // (The config file might specify --help, too, but we ignore + // that.) + } + config.validate(); + return std::move(config); + } catch (const silo::config::ConfigException& e) { + std::cerr << fmt::format( + "Usage error: {}.\n\nRun with the --help option for help.\n", e.what() + ) + << std::flush; + return 1; + } +} + +} // namespace silo::config diff --git a/include/config/config_key_path.h b/include/config/config_key_path.h new file mode 100644 index 000000000..2169a414f --- /dev/null +++ b/include/config/config_key_path.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include +#include + +namespace silo::config { + +/// Internal representation of config keys. +/// List of lists of _non-empty lower-case alphanumeric_ strings +/// By example the YAML field: +/// `query.materializationCutoff` will be represented as +/// ["query",["materialization","cutoff"]] +/// This is easy to handle internally and also easy for transformation +/// into CLI argument string and environment variable string +class ConfigKeyPath { + ConfigKeyPath(std::vector> path) + : path(path) {} + + public: + std::vector> path; // TODO make private and get()? + + ConfigKeyPath() = default; + + static ConfigKeyPath from(std::vector> paths); + + friend bool operator==(const ConfigKeyPath& lhs, const ConfigKeyPath& rhs) { + return lhs.path == rhs.path; + } + + [[nodiscard]] std::string toDebugString() const; +}; + +/// Like ConfigKeyPath, but it is impossible to decide whether the input value +/// meant to refer to api.port or apiPort. This is the case for CLI arguments (--api-port) +/// and Environment Variables (SILO_API_PORT) +class AmbiguousConfigKeyPath { + public: + std::vector path; + + static AmbiguousConfigKeyPath from(const ConfigKeyPath& key_path); + + friend bool operator==(const AmbiguousConfigKeyPath& lhs, const AmbiguousConfigKeyPath& rhs) { + return lhs.path == rhs.path; + } +}; + +} // namespace silo::config + +template <> +struct [[maybe_unused]] fmt::formatter : fmt::formatter { + [[maybe_unused]] static auto format(const silo::config::ConfigKeyPath& val, format_context& ctx) + -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "{}", val.toDebugString()); + } +}; + +// So that we are able to use std::unordered_map of our internal representation of config keys +namespace std { +template <> +struct hash { + std::size_t operator()(const silo::config::ConfigKeyPath& key) const; +}; +} // namespace std diff --git a/include/config/config_specification.h b/include/config/config_specification.h new file mode 100644 index 000000000..574cebdf4 --- /dev/null +++ b/include/config/config_specification.h @@ -0,0 +1,42 @@ +#pragma once + +//! Structs with which to declare metainformation on structs that are +//! to hold configuration data. + +#include +#include +#include + +#include "config/config_key_path.h" +#include "config/config_value.h" +#include "config/verified_config_source.h" +#include "silo/common/cons_list.h" + +namespace silo::config { + +/// Does not support extracting non-option arguments; those wouldn't +/// be supported by env vars or config files anyway, although could +/// still be specified for command line, but that's not implemented +/// currently. +class ConfigSpecification { + public: + /// The name of the program for which this config is used. This will be printed in the help text + std::string_view program_name; + // Using std::vector so that initialization in place is possible; + // std::span would require the array to exist in a different global + // first, don't want to make it verbose like that. Paying with + // dropping constexpr for that. + std::vector fields; + + std::string helpText() const; + + std::optional getValueSpecification(const ConfigKeyPath& key) const; + + std::optional getValueSpecificationFromAmbiguousKey( + const AmbiguousConfigKeyPath& key + ) const; + + VerifiedConfigSource getConfigSourceFromDefaults() const; +}; + +} // namespace silo::config diff --git a/include/config/config_value.h b/include/config/config_value.h new file mode 100644 index 000000000..09b3da558 --- /dev/null +++ b/include/config/config_value.h @@ -0,0 +1,113 @@ +#pragma once + +//! Part of config metadata, but can't be in `config_metadata.h` due to +//! that depending on `config/config_source_interface.h` which also +//! references `ConfigValue`. + +#include +#include +#include +#include +#include + +#include "config/config_key_path.h" + +namespace silo::config { + +enum class ConfigValueType { STRING, PATH, INT32, UINT32, UINT16, BOOL }; + +constexpr std::string_view configValueTypeToString(ConfigValueType type) { + switch (type) { + case ConfigValueType::STRING: + return "string"; + case ConfigValueType::PATH: + return "path"; + case ConfigValueType::INT32: + return "i32"; + case ConfigValueType::UINT32: + return "u32"; + case ConfigValueType::UINT16: + return "u16"; + case ConfigValueType::BOOL: + return "bool"; + } +} + +// Forward declaration for friend class access +class ConfigValueSpecification; + +class ConfigValue { + friend class ConfigValueSpecification; + + ConfigValue( + std::variant value + ) + : value(value) {} + + public: + std::variant value; + + static ConfigValue fromString(const std::string& value) { return ConfigValue{value}; } + + static ConfigValue fromPath(const std::filesystem::path& value) { return ConfigValue{value}; } + + static ConfigValue fromInt32(int32_t value) { return ConfigValue{value}; } + + static ConfigValue fromUint32(uint32_t value) { return ConfigValue{value}; } + + static ConfigValue fromUint16(uint16_t value) { return ConfigValue{value}; } + + static ConfigValue fromBool(bool value) { return ConfigValue{value}; } + + ConfigValueType getValueType() const; + + std::string toString() const; +}; + +class ConfigValueSpecification { + ConfigValueSpecification() = default; + + public: + ConfigKeyPath key; + ConfigValueType type; + std::optional default_value; + /// Help as shown for --help, excluding the other info above. + /// If type is bool, the command line option does not take an argument but + /// is the constant "true", which will be added to the help text + std::string_view help_text; + + ConfigValue getValueFromString(std::string value_string) const; + + ConfigValue createValue( + std::variant value + ) const; + + static ConfigValueSpecification createWithoutDefault( + ConfigKeyPath key, + ConfigValueType value_type, + std::string_view help_text + ) { + ConfigValueSpecification value_specification; + value_specification.key = key; + value_specification.type = value_type; + value_specification.help_text = help_text; + return value_specification; + } + + /// No need for the value_type. It is implicitly defined by the default. Prevents + /// misspecification. + static ConfigValueSpecification createWithDefault( + ConfigKeyPath key, + ConfigValue default_value, + std::string_view help_text + ) { + ConfigValueSpecification value_specification; + value_specification.key = key; + value_specification.type = default_value.getValueType(); + value_specification.default_value = default_value; + value_specification.help_text = help_text; + return value_specification; + } +}; + +} // namespace silo::config diff --git a/include/config/verified_config_source.h b/include/config/verified_config_source.h new file mode 100644 index 000000000..745d9e35e --- /dev/null +++ b/include/config/verified_config_source.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +#include "config/config_key_path.h" +#include "config/config_value.h" + +namespace silo::config { + +/// A VerifiedConfigSource is providing I/O- and key error free (but +/// not necessarily value-error free) access to a set of configuration +/// data. +class VerifiedConfigSource { + public: + std::unordered_map config_values; + + /// Retrieve a config value for the given key as a string + /// (potentially converting other value types). (Explicitly + /// getting as a string is necessary for YAML, where the YAML + /// parser already has some typed representations but not + /// necessarily those we need. (Todo: this is a hack, improve.)) + /// This returns an option since even though invalid options are + /// not present in self, the given option may also not be present. + [[nodiscard]] std::optional getString(const ConfigKeyPath& config_key_path) const; + + [[nodiscard]] std::optional getPath(const ConfigKeyPath& config_key_path + ) const; + + [[nodiscard]] std::optional getInt32(const ConfigKeyPath& config_key_path) const; + + [[nodiscard]] std::optional getUint32(const ConfigKeyPath& config_key_path) const; + + [[nodiscard]] std::optional getUint16(const ConfigKeyPath& config_key_path) const; + + [[nodiscard]] std::optional getFloat(const ConfigKeyPath& config_key_path) const; + + [[nodiscard]] std::optional getBool(const ConfigKeyPath& config_key_path) const; +}; + +} // namespace silo::config diff --git a/include/silo/common/alist.h b/include/silo/common/alist.h new file mode 100644 index 000000000..f747107cb --- /dev/null +++ b/include/silo/common/alist.h @@ -0,0 +1,26 @@ +#include +#include +#include + +template +class AList { + public: + // Constructor that takes a reference to a vector of key-value pairs + explicit AList(const std::vector>& data) + : data(data) {} + + // Find the value associated with a given key; returns NULL if key + // is not found. + const V* get(const K& key) const { + auto iter = std::find_if(data.begin(), data.end(), [&key](const auto& pair) { + return pair.first == key; + }); + if (iter != data.end()) { + return &iter->second; + } + return NULL; + } + + private: + const std::vector>& data; +}; diff --git a/include/silo/common/cons_list.h b/include/silo/common/cons_list.h new file mode 100644 index 000000000..0ccb43a03 --- /dev/null +++ b/include/silo/common/cons_list.h @@ -0,0 +1,68 @@ +#pragma once + +//! Cons list. The tail of the list is stored as a normal reference +//! (no reference counting). This is meant to be used with recursive +//! algorithms to maintain a path back up. + +#include +#include +#include + +template +class ConsList { + std::optional>>> inner; + + public: + explicit ConsList() + : inner(std::nullopt) {} + explicit ConsList(std::optional>>> inner_) + : inner(inner_) {} + + ConsList cons(T val) const { + std::pair&> pair{val, *this}; + return ConsList(std::optional{pair}); + } + + [[nodiscard]] bool isEmpty() const { return !inner.has_value(); } + + std::optional> first() const { + if (isEmpty()) { + return std::nullopt; + } + const std::pair>>& pair = inner.value(); + return std::optional>(std::get<0>(pair)); + } + + std::optional>> rest() const { + if (isEmpty()) { + return std::nullopt; + } + const std::pair>>& pair = inner.value(); + return std::optional>>(std::get<1>(pair)); + } + + /* ... */ + + // template + std::vector toVec() const { + std::vector values{}; + std::reference_wrapper> current = std::cref(*this); + + while (!current.get().isEmpty()) { + values.push_back(current.get().first().value()); + current = current.get().rest().value(); + } + return values; + } + + // template + std::vector toVecReverse() const { + // There's no faster way than reverse (except perhaps + // recursion, but that is dicey, or getting the list first + // then set Vec slots via index, but that needs Default and + // writes to memory twice, too), right? + std::vector values = toVec(); + std::reverse(values.begin(), values.end()); + return values; + } +}; diff --git a/include/silo/common/fmt_formatters.h b/include/silo/common/fmt_formatters.h new file mode 100644 index 000000000..49ee65f16 --- /dev/null +++ b/include/silo/common/fmt_formatters.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include "silo/common/panic.h" + +template +struct [[maybe_unused]] fmt::formatter> : fmt::formatter { + [[maybe_unused]] static auto format(const std::optional& val, format_context& ctx) + -> decltype(ctx.out()) { + if (val.has_value()) { + return fmt::format_to(ctx.out(), "'{}'", val.value()); + } + return fmt::format_to(ctx.out(), "null"); + } +}; + +template +struct [[maybe_unused]] fmt::formatter> : fmt::formatter { + [[maybe_unused]] static auto format(const std::unordered_map& val, format_context& ctx) + -> decltype(ctx.out()) { + auto out = ctx.out(); + fmt::format_to(out, "{{\n"); + for (const auto& [key, value] : val) { + fmt::format_to(out, " {}: {},\n", key, value); + } + fmt::format_to(out, "}}"); + return out; + } +}; + +template <> +struct [[maybe_unused]] fmt::formatter : fmt::formatter { + [[maybe_unused]] static auto format(const std::filesystem::path& val, format_context& ctx) + -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "{}", val.string()); + } +}; + +namespace silo::common { + +std::string toIsoString( + const std::chrono::time_point& time_point +); +} // namespace silo::common + +template <> +struct [[maybe_unused]] fmt::formatter< + std::chrono::time_point> + : fmt::formatter { + [[maybe_unused]] static auto format( + const std::chrono::time_point& val, + format_context& ctx + ) -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "{}", silo::common::toIsoString(val)); + } +}; + +namespace fmt { + +template <> +// TODO issue for moving this? +struct formatter { + constexpr auto parse(format_parse_context& ctx) -> decltype(ctx.begin()) { return ctx.end(); } + + template + auto format(const nlohmann::json& json, FormatContext& ctx) -> decltype(ctx.out()) { + return fmt::format_to(ctx.out(), "{}", json.dump()); + } +}; + +template <> +// TODO issue for moving this? +struct formatter { + constexpr auto parse(format_parse_context& ctx) -> decltype(ctx.begin()) { return ctx.end(); } + + template + auto format(const YAML::Node& yaml, FormatContext& ctx) -> decltype(ctx.out()) { + YAML::Emitter out; + out << yaml; + SILO_ASSERT(out.good()); + return fmt::format_to(ctx.out(), "{}", out.c_str()); + } +}; + +} // namespace fmt diff --git a/include/silo/common/overloaded.h b/include/silo/common/overloaded.h new file mode 100644 index 000000000..90294b3c1 --- /dev/null +++ b/include/silo/common/overloaded.h @@ -0,0 +1,12 @@ +#pragma once + +// From https://en.cppreference.com/w/cpp/utility/variant/visit +// helper type for the visitor #4 +template +struct overloaded : Ts... { + using Ts::operator()...; +}; + +// explicit deduction guide (not needed as of C++20) +template +overloaded(Ts...) -> overloaded; diff --git a/include/silo/common/type_name.h b/include/silo/common/type_name.h new file mode 100644 index 000000000..31f66fcf5 --- /dev/null +++ b/include/silo/common/type_name.h @@ -0,0 +1,16 @@ +#pragma once + +//! A function to get the name of a type (since `typeid(T).name()` +//! doesn't give anything useful). + +#include +#include +#include + +namespace silo::common { + +// Template to get the type name +template +std::string typeName(); + +} // namespace silo::common diff --git a/include/silo/config/config_defaults.h b/include/silo/config/config_defaults.h new file mode 100644 index 000000000..05c1e8a3e --- /dev/null +++ b/include/silo/config/config_defaults.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace silo::config { + +static const std::filesystem::path DEFAULT_OUTPUT_DIRECTORY = "./output/"; + +} diff --git a/include/silo/config/preprocessing_config.h b/include/silo/config/preprocessing_config.h index 7136a251c..442385049 100644 --- a/include/silo/config/preprocessing_config.h +++ b/include/silo/config/preprocessing_config.h @@ -7,59 +7,54 @@ #include #include +#include #include -#include "silo/config/util/abstract_config_source.h" +#include "config/backend/yaml_file.h" +#include "config/config_interface.h" +#include "silo/config/config_defaults.h" namespace silo::config { -const AbstractConfigSource::Option INPUT_DIRECTORY_OPTION{{"inputDirectory"}}; -const AbstractConfigSource::Option OUTPUT_DIRECTORY_OPTION = {{"outputDirectory"}}; -const AbstractConfigSource::Option INTERMEDIATE_RESULTS_DIRECTORY_OPTION = { - {"intermediateResultsDirectory"} -}; -const AbstractConfigSource::Option PREPROCESSING_DATABASE_LOCATION_OPTION = { - {"preprocessingDatabaseLocation"} -}; -const AbstractConfigSource::Option DUCKDB_MEMORY_LIMIT_OPTION = {{"duckdbMemoryLimitInG"}}; -const AbstractConfigSource::Option LINEAGE_DEFINITIONS_FILENAME_OPTION = { - {"lineageDefinitionsFilename"} -}; -const AbstractConfigSource::Option NDJSON_INPUT_FILENAME_OPTION = {{"ndjsonInputFilename"}}; -const AbstractConfigSource::Option REFERENCE_GENOME_FILENAME_OPTION = {{"referenceGenomeFilename"}}; - -const std::string DEFAULT_OUTPUT_DIRECTORY = "./output/"; - class PreprocessingConfig { friend class fmt::formatter; public: - std::filesystem::path input_directory = "./"; - std::filesystem::path output_directory = DEFAULT_OUTPUT_DIRECTORY; - std::filesystem::path intermediate_results_directory = "./temp/"; + bool help; + std::filesystem::path input_directory = "./"; // TODO + std::filesystem::path output_directory = DEFAULT_OUTPUT_DIRECTORY; // TODO + std::filesystem::path intermediate_results_directory = std::filesystem::path{"./temp/"}; // TODO std::optional preprocessing_database_location; - std::optional duckdb_memory_limit_in_g; + std::optional duckdb_memory_limit_in_g = 10; // TODO remove std::optional lineage_definitions_file; std::optional ndjson_input_filename; - std::filesystem::path reference_genome_file = "reference_genomes.json"; + std::filesystem::path database_config_file = "database_config.yaml"; // TODO + std::filesystem::path reference_genome_file = + "reference_genomes.json"; // TODO remove dup default + std::optional preprocessing_config; - void validate() const; + /// Create PreprocessingConfig with all default values from the specification + PreprocessingConfig(); - [[nodiscard]] std::filesystem::path getOutputDirectory() const; + static ConfigSpecification getConfigSpecification(); - [[nodiscard]] std::filesystem::path getIntermediateResultsDirectory() const; + void validate() const; + + [[nodiscard]] std::filesystem::path getDatabaseConfigFilename() const; [[nodiscard]] std::optional getLineageDefinitionsFilename() const; [[nodiscard]] std::filesystem::path getReferenceGenomeFilename() const; - [[nodiscard]] std::optional getPreprocessingDatabaseLocation() const; + [[nodiscard]] std::optional getNdjsonInputFilename() const; - [[nodiscard]] std::optional getDuckdbMemoryLimitInG() const; + [[nodiscard]] uint32_t getDuckdbMemoryLimitInG() const; - [[nodiscard]] std::optional getNdjsonInputFilename() const; + [[nodiscard]] bool asksForHelp() const; + + void overwriteFrom(const silo::config::VerifiedConfigSource& config_source); - void overwrite(const silo::config::AbstractConfigSource& config_reader); + [[nodiscard]] std::optional configPath() const; }; } // namespace silo::config diff --git a/include/silo/config/runtime_config.h b/include/silo/config/runtime_config.h index 5e04d4e0d..d19dc5b87 100644 --- a/include/silo/config/runtime_config.h +++ b/include/silo/config/runtime_config.h @@ -3,21 +3,17 @@ #include #include -#include "silo/config/preprocessing_config.h" -#include "silo/config/util/abstract_config_source.h" +#include -namespace silo::config { +#include "config/config_backend.h" +#include "config/config_interface.h" +#include "config/config_specification.h" +#include "silo/config/config_defaults.h" -const AbstractConfigSource::Option DATA_DIRECTORY_OPTION{{"dataDirectory"}}; -const AbstractConfigSource::Option MAX_CONNECTIONS_OPTION{{"maxQueuedHttpConnections"}}; -const AbstractConfigSource::Option PARALLEL_THREADS_OPTION{{"threadsForHttpConnections"}}; -const AbstractConfigSource::Option PORT_OPTION{{"port"}}; -const AbstractConfigSource::Option ESTIMATED_STARTUP_TIME_IN_MINUTES_OPTION{ - {"estimatedStartupTimeInMinutes"} -}; +namespace silo::config { struct ApiOptions { - std::filesystem::path data_directory = silo::config::DEFAULT_OUTPUT_DIRECTORY; + // XXX remove defaults, now in structs int32_t max_connections = 64; int32_t parallel_threads = 4; uint16_t port = 8081; @@ -25,10 +21,36 @@ struct ApiOptions { estimated_startup_end; }; +struct QueryOptions { + size_t materialization_cutoff = 10000; +}; + struct RuntimeConfig { + bool help; + std::optional runtime_config; + std::filesystem::path data_directory = silo::config::DEFAULT_OUTPUT_DIRECTORY; ApiOptions api_options; + QueryOptions query_options; - void overwrite(const silo::config::AbstractConfigSource& config); + RuntimeConfig(); + + static ConfigSpecification getConfigSpecification(); + + void validate() const {}; + + [[nodiscard]] bool asksForHelp() const; + + [[nodiscard]] std::optional configPath() const; + + void overwriteFrom(const VerifiedConfigSource& config_source); }; } // namespace silo::config + +template <> +struct [[maybe_unused]] fmt::formatter : fmt::formatter { + [[maybe_unused]] static auto format( + const silo::config::RuntimeConfig& runtime_config, + format_context& ctx + ) -> decltype(ctx.out()); +}; diff --git a/include/silo/config/util/abstract_config_source.h b/include/silo/config/util/abstract_config_source.h deleted file mode 100644 index 15945db48..000000000 --- a/include/silo/config/util/abstract_config_source.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -namespace silo::config { - -class AbstractConfigSource { - public: - class Option { - public: - std::vector access_path; - - [[nodiscard]] std::string toString() const; - [[nodiscard]] std::string toCamelCase() const; - }; - - [[nodiscard]] virtual std::string configType() const = 0; - - [[nodiscard]] virtual bool hasProperty(const Option& option) const = 0; - [[nodiscard]] virtual std::optional getString(const Option& option) const = 0; - [[nodiscard]] virtual std::optional getInt32(const Option& option) const; - [[nodiscard]] virtual std::optional getUInt32(const Option& option) const; -}; - -} // namespace silo::config diff --git a/include/silo/config/util/yaml_file.h b/include/silo/config/util/yaml_file.h deleted file mode 100644 index 675077c44..000000000 --- a/include/silo/config/util/yaml_file.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -#include - -#include "abstract_config_source.h" - -namespace silo::config { - -class YamlFile : public silo::config::AbstractConfigSource { - std::filesystem::path filename; - YAML::Node node; - - public: - explicit YamlFile(const std::filesystem::path& filename); - - std::string configType() const override; - - bool hasProperty(const Option& option) const override; - - std::optional getString(const Option& option) const override; -}; - -} // namespace silo::config diff --git a/include/silo/test/query_fixture.test.h b/include/silo/test/query_fixture.test.h index c18d1e660..d238ef787 100644 --- a/include/silo/test/query_fixture.test.h +++ b/include/silo/test/query_fixture.test.h @@ -8,10 +8,10 @@ #include #include +#include "silo/common/fmt_formatters.h" #include "silo/common/lineage_tree.h" #include "silo/config/database_config.h" #include "silo/config/preprocessing_config.h" -#include "silo/config/util/yaml_file.h" #include "silo/database.h" #include "silo/database_info.h" #include "silo/preprocessing/preprocessor.h" @@ -71,7 +71,7 @@ namespace silo::test { ASSERT_EQ(actual, scenario.expected_query_result); \ } \ } \ - } // namespace \ + } // namespace struct QueryTestData { const std::vector ndjson_input_data; @@ -99,11 +99,10 @@ class QueryTestFixture : public ::testing::TestWithParam { std::filesystem::path input_directory = fmt::format("test{}", millis); std::filesystem::create_directories(input_directory); - config::PreprocessingConfig config_with_input_dir{ - .input_directory = input_directory, - .intermediate_results_directory = input_directory, - .ndjson_input_filename = "input.json" - }; + config::PreprocessingConfig config_with_input_dir; + config_with_input_dir.input_directory = input_directory; + config_with_input_dir.intermediate_results_directory = input_directory; + config_with_input_dir.ndjson_input_filename = "input.json"; config_with_input_dir.validate(); DataContainer::input_directory = input_directory; @@ -117,7 +116,7 @@ class QueryTestFixture : public ::testing::TestWithParam { std::cerr << "Could not open file for writing" << std::endl; return; } - for (const auto json : test_data.ndjson_input_data) { + for (const auto& json : test_data.ndjson_input_data) { file << json.dump() << std::endl; } file.close(); diff --git a/include/silo_api/api.h b/include/silo_api/api.h new file mode 100644 index 000000000..e97f0f9cd --- /dev/null +++ b/include/silo_api/api.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +#include "silo/config/runtime_config.h" + +class SiloServer : public Poco::Util::ServerApplication { + public: + int runApi(const silo::config::RuntimeConfig& runtime_config); +}; \ No newline at end of file diff --git a/include/silo_api/command_line_arguments.h b/include/silo_api/command_line_arguments.h deleted file mode 100644 index e8668d20b..000000000 --- a/include/silo_api/command_line_arguments.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -#include "silo/config/util/abstract_config_source.h" - -namespace silo_api { - -class CommandLineArguments : public silo::config::AbstractConfigSource { - const Poco::Util::AbstractConfiguration& config; - - public: - static std::string asUnixOptionString(const Option& option); - - explicit CommandLineArguments(const Poco::Util::AbstractConfiguration& config); - - [[nodiscard]] std::string configType() const override; - - [[nodiscard]] bool hasProperty(const Option& option) const override; - - [[nodiscard]] std::optional getString(const Option& option) const override; -}; - -} // namespace silo_api diff --git a/include/silo_api/environment_variables.h b/include/silo_api/environment_variables.h deleted file mode 100644 index 090493b39..000000000 --- a/include/silo_api/environment_variables.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include - -#include "silo/config/util/abstract_config_source.h" - -namespace silo_api { - -class EnvironmentVariables : public silo::config::AbstractConfigSource { - public: - static std::string prefixedUppercase(const Option& option); - - [[nodiscard]] std::string configType() const override; - - [[nodiscard]] bool hasProperty(const Option& option) const override; - - [[nodiscard]] std::optional getString(const Option& option) const override; -}; - -} // namespace silo_api diff --git a/src/config/backend/command_line_arguments.cpp b/src/config/backend/command_line_arguments.cpp new file mode 100644 index 000000000..0ad52fc25 --- /dev/null +++ b/src/config/backend/command_line_arguments.cpp @@ -0,0 +1,111 @@ +#include "config/backend/command_line_arguments.h" + +#include + +#include +#include +#include +#include + +#include "silo/common/panic.h" +#include "silo/config/util/config_exception.h" + +namespace silo::config { + +std::string CommandLineArguments::configKeyPathToString(const ConfigKeyPath& key_path) { + std::vector result; + for (const auto& sublevel : key_path.path) { + for (const std::string& current_string : sublevel) { + result.push_back(current_string); + } + } + return "--" + boost::join(result, "-"); +} + +AmbiguousConfigKeyPath CommandLineArguments::stringToConfigKeyPath( + const std::string& command_line_argument +) { + if (command_line_argument.empty() || command_line_argument[0] != '-') { + throw std::invalid_argument("Invalid Unix option string"); + } + + AmbiguousConfigKeyPath config_key_path; + // Remove the leading dash(es) and split by '-' + std::string trimmed = command_line_argument.substr(1); // Skip the first '-' + std::vector tokens; + + boost::split(tokens, trimmed, boost::is_any_of("-")); + + // Here, for simplicity, treat each token as its own sublevel + for (const auto& token : tokens) { + if (!token.empty()) { + config_key_path.path.push_back(token); + } + } + + return config_key_path; +} + +VerifiedConfigSource CommandLineArguments::verify(const ConfigSpecification& config_struct) const { + // Parse the command line, now that we have the option keys + // and the info about whether they take an argument (any that + // are not of type bool). + + // E.g. "--api-foo" => "1234" + std::unordered_map config_value_by_option; + std::vector positional_args; + std::vector invalid_config_keys; + + for (size_t i = 0; i < args.size(); ++i) { + const std::string& arg = args[i]; + if (arg.starts_with('-')) { + if (arg == "--") { + for (size_t j = i + 1; j < args.size(); ++j) { + positional_args.push_back(args[j]); + } + break; + } + const AmbiguousConfigKeyPath ambiguous_key = stringToConfigKeyPath(arg); + if (auto value_specification_opt = config_struct.getValueSpecificationFromAmbiguousKey(ambiguous_key)) { + auto value_specification = value_specification_opt.value(); + std::string value_string; + if (value_specification.type == ConfigValueType::BOOL) { + value_string = "1"; + } else { + ++i; + if (i == args.size()) { + // VerificationError::ParseError in Rust + throw silo::config::ConfigException("missing argument after option " + arg); + } + value_string = args[i]; + } + ConfigValue value = value_specification.getValueFromString(value_string); + // Overwrite value with the last occurrence + // (i.e. `silo --foo 4 --foo 5` will leave "--foo" + // => "5" in the map). + config_value_by_option.emplace(value_specification.key, value); + } else { + invalid_config_keys.push_back(arg); + } + } else { + positional_args.push_back(arg); + } + } + + if (!invalid_config_keys.empty()) { + const char* keys_or_options = (invalid_config_keys.size() >= 2) ? "options" : "option"; + throw silo::config::ConfigException(fmt::format( + "in {}: unknown {} {}", + errorContext(), + keys_or_options, + boost::join(invalid_config_keys, ", ") + )); + } + + // Need to specify VerifiedCommandLineArguments { } because the + // constructor is private and std::make_unique foils the friend + // relationship. + return VerifiedConfigSource{std::move(config_value_by_option)}; +} + +} // namespace silo::config diff --git a/src/config/backend/environment_variables.cpp b/src/config/backend/environment_variables.cpp new file mode 100644 index 000000000..f2c861988 --- /dev/null +++ b/src/config/backend/environment_variables.cpp @@ -0,0 +1,120 @@ +#include "config/backend/environment_variables.h" + +#include + +#include +#include + +#include "silo/common/alist.h" + +constexpr std::string_view env_var_prefix = "SILO_"; + +namespace { + +std::string toLowerCase(std::string input) { // TODO look for other usages of the same thing + std::string result; + std::ranges::transform(input, result.begin(), ::tolower); + return result; +} + +} // namespace + +namespace silo::config { + +EnvironmentVariables EnvironmentVariables::decodeEnvironmentVariables(const char* const* envp) { + std::vector> alist; + for (const char* const* current_envp = envp; *current_envp != nullptr; current_envp++) { + const char* env = *current_envp; + for (size_t i = 0; env[i] != 0; i++) { + if (env[i] == '=') { + std::string key{env, i}; + if (key.starts_with(env_var_prefix)) { + std::string val{env + i + 1}; + alist.emplace_back(key, val); + } + } + } + } + return EnvironmentVariables{std::move(alist)}; +} + +[[nodiscard]] std::string EnvironmentVariables::configKeyPathToString( + const ConfigKeyPath& config_key_path +) { + std::vector result; + for (const auto& sublevel : config_key_path.path) { + for (const std::string& current_string : sublevel) { + std::string current_string_all_uppercase; + std::ranges::transform( + current_string, + std::back_inserter(current_string_all_uppercase), + [](unsigned char c) { return std::toupper(c); } + ); + result.push_back(current_string_all_uppercase); + } + } + return fmt::format("{}{}", env_var_prefix, boost::join(result, "_")); +} + +AmbiguousConfigKeyPath EnvironmentVariables::stringToConfigKeyPath( + const std::string& key_path_string +) { + std::vector result; + + // Ensure the prefix exists + if (key_path_string.rfind(env_var_prefix, 0) != 0) { + throw std::invalid_argument("String does not start with the expected prefix."); + } + + // Remove the prefix + std::string trimmed = key_path_string.substr(env_var_prefix.size()); + + // Split by '_' + std::stringstream ss(trimmed); + std::string token; + while (std::getline(ss, token, '_')) { + result.push_back(toLowerCase(token)); + } + + return {result}; +} + +[[nodiscard]] VerifiedConfigSource EnvironmentVariables::verify( + const ConfigSpecification& config_specification +) const { + std::unordered_map config_values; + std::vector invalid_config_keys; + for (const auto& [key_string, value_string] : alist) { + auto ambiguous_key = EnvironmentVariables::stringToConfigKeyPath(key_string); + auto value_specification_opt = + config_specification.getValueSpecificationFromAmbiguousKey(ambiguous_key); + if (value_specification_opt.has_value()) { + auto value_specification = value_specification_opt.value(); + ConfigValue value = value_specification.getValueFromString(value_string); + config_values.emplace(value_specification.key, value); + } else { + if (key_string == "SILO_PANIC") { + SPDLOG_TRACE( + "allowing env variable {} which is independent of the config system", key_string + ); + } else { + invalid_config_keys.push_back(key_string); + } + } + } + + if (!invalid_config_keys.empty()) { + const std::string_view keys_or_options = + (invalid_config_keys.size() >= 2) ? "variables" : "variable"; + throw silo::config::ConfigException(fmt::format( + "in {}: unknown {} {}", + errorContext(), + keys_or_options, + boost::join(invalid_config_keys, ", ") + )); + } + + return VerifiedConfigSource(config_values); +} + +} // namespace silo::config diff --git a/src/config/backend/yaml_file.cpp b/src/config/backend/yaml_file.cpp new file mode 100644 index 000000000..23381a650 --- /dev/null +++ b/src/config/backend/yaml_file.cpp @@ -0,0 +1,235 @@ +#include "config/backend/yaml_file.h" + +#include +#include +#include + +#include +#include + +#include "silo/common/alist.h" +#include "silo/common/fmt_formatters.h" +#include "silo/config/util/config_exception.h" + +using silo::config::ConfigKeyPath; + +namespace { + +// Only valid if `isProperSingularValue(node) == true`. +std::string stringFromYaml(const YAML::Node& node) { // TODO check unused + return node.as(); +} + +bool isProperSingularValue(const YAML::Node& node) { + if (node.IsMap()) { + SPDLOG_TRACE("isProperSingularValue = false, node is a map"); + return false; + } + if (!node.IsDefined()) { + SPDLOG_TRACE("isProperSingularValue = false, node is not defined"); + return false; + } + if (!node.IsScalar()) { + SPDLOG_TRACE("isProperSingularValue = false, node is not a scalar"); + return false; + } + return true; +} + +std::vector splitByDot(const std::string& str) { + std::vector result; + std::stringstream ss(str); + std::string token; + + while (std::getline(ss, token, '.')) { + result.push_back(token); + } + + return result; +} + +std::vector splitCamelCase(const std::string& camelCaseString) { + std::vector result; + std::string current; + + for (char ch : camelCaseString) { + if (std::isupper(ch)) { + // If current is not empty, push it to the result + if (!current.empty()) { + result.push_back(current); + current.clear(); + } + // Add the lowercase version of the uppercase char as the start of a new substring + current += std::tolower(ch); + } else { + // Append lowercase or non-uppercase char to current + current += ch; + } + } + // Push the last accumulated string to result + if (!current.empty()) { + result.push_back(current); + } + + return result; +} + +std::string joinCamelCase(const std::vector& words) { + std::string camelCaseString; + + for (size_t i = 0; i < words.size(); ++i) { + if (i == 0) { + // Add the first word as is (lowercase) + camelCaseString += words[i]; + } else { + // Capitalize the first character of subsequent words and append them + std::string word = words[i]; + if (!word.empty()) { + word[0] = std::toupper(word[0]); + camelCaseString += word; + } + } + } + + return camelCaseString; +} + +void yamlToPaths( + const std::string& config_context, + const YAML::Node& node, + const ConsList>& parents, + std::unordered_map& paths +) { + if (node.IsMap()) { + for (const auto& key_value : node) { + const auto key = key_value.first.as(); + // ^ XX what if key is not a string? + const auto parents2 = parents.cons(splitCamelCase(key)); + const auto child_node = key_value.second; + yamlToPaths(config_context, child_node, parents2, paths); + } + } else { + ConfigKeyPath path = ConfigKeyPath::from(parents.toVecReverse()); + if (isProperSingularValue(node)) { + paths.emplace(path, node); + } else { + throw silo::config::ConfigException(fmt::format( + "{}: found non-usable leaf value at nesting {}", config_context, path.toDebugString() + )); + } + } +} + +} // namespace + +namespace silo::config { + +std::string YamlFile::configKeyPathToString(const ConfigKeyPath& config_key_path) { + std::vector camelCaseStrings; + for (const auto& list : config_key_path.path) { + camelCaseStrings.emplace_back(joinCamelCase(list)); + } + return boost::join(camelCaseStrings, "."); +} + +ConfigKeyPath YamlFile::stringToConfigKeyPath(const std::string& key_path_string) { + std::vector camelCaseStrings = splitByDot(key_path_string); + std::vector> result; + std::transform( + camelCaseStrings.begin(), camelCaseStrings.end(), std::back_inserter(result), splitCamelCase + ); + return ConfigKeyPath::from(result); +} + +YamlFile YamlFile::fromYAML(const std::string& error_context, const std::string& yaml_string) { + try { + YAML::Node node = YAML::Load(yaml_string); + + // Collect all paths present + std::unordered_map paths; + yamlToPaths(error_context, node, ConsList>{}, paths); + + return YamlFile{error_context, paths}; + } catch (const YAML::ParserException& parser_exception) { + throw std::runtime_error( + fmt::format("{} does not contain valid YAML: {}", error_context, parser_exception.what()) + ); + } +} + +YamlFile YamlFile::readFile(const std::filesystem::path& path) { + const std::ifstream file(path, std::ios::in | std::ios::binary); + if (file.fail()) { + throw std::runtime_error(fmt::format("Could not open the YAML file: '{}'", path)); + } + + std::ostringstream contents; + contents << file.rdbuf(); + if (contents.fail()) { + throw std::runtime_error(fmt::format("Error when reading the YAML file: '{}'", path)); + } + + return fromYAML(fmt::format("file: '{}'", path.string()), contents.str()); +} + +std::string YamlFile::errorContext() const { // TODO naming consistent? + return fmt::format("YAML file '{}'", error_context); +} + +namespace { +ConfigValue yamlNodeToConfigValue( + const ConfigValueSpecification& value_specification, + const YAML::Node& yaml +) { + switch (value_specification.type) { + case ConfigValueType::STRING: + return value_specification.createValue(yaml.as()); + case ConfigValueType::PATH: + return value_specification.createValue({std::filesystem::path{yaml.as()}}); + case ConfigValueType::INT32: + return value_specification.createValue(yaml.as()); + case ConfigValueType::UINT32: + return value_specification.createValue(yaml.as()); + case ConfigValueType::UINT16: + return value_specification.createValue(yaml.as()); + case ConfigValueType::BOOL: + return value_specification.createValue(yaml.as()); + } +} +} // namespace + +VerifiedConfigSource YamlFile::verify(const ConfigSpecification& config_specification) const { + // No need to stringify and do duplicate check since + // ConfigKeyPath is actually directly representing YAML paths. + + // Check the ones given, collect erroneous ones in foo.bar syntax + std::vector invalid_config_keys; + std::unordered_map provided_config_values; + for (const auto& [key, yaml] : getYamlFields()) { + auto value_specification = config_specification.getValueSpecification(key); + if (!value_specification.has_value()) { + invalid_config_keys.push_back(configKeyPathToString(key)); + } else { + ConfigValue value = yamlNodeToConfigValue(value_specification.value(), yaml); + provided_config_values.emplace(key, value); + } + } + + if (!invalid_config_keys.empty()) { + const char* keys_or_options = (invalid_config_keys.size() >= 2) ? "keys" : "key"; + throw silo::config::ConfigException(fmt::format( + "in {}: unknown {} {}", + errorContext(), + keys_or_options, + boost::join(invalid_config_keys, ", ") + )); + } + + return VerifiedConfigSource{provided_config_values}; +} + +const std::unordered_map& YamlFile::getYamlFields() const { + return yaml_fields; +} + +} // namespace silo::config diff --git a/src/config/command_line_arguments.test.cpp b/src/config/command_line_arguments.test.cpp new file mode 100644 index 000000000..3ee9906d5 --- /dev/null +++ b/src/config/command_line_arguments.test.cpp @@ -0,0 +1,26 @@ +#include "config/backend/command_line_arguments.h" + +#include + +using silo::config::CommandLineArguments; +using silo::config::ConfigKeyPath; + +TEST(CommandLineArguments, correctUnixOptionString) { + ASSERT_EQ(CommandLineArguments::configKeyPathToString(ConfigKeyPath::from({{"a"}})), "--a"); + ASSERT_EQ(CommandLineArguments::configKeyPathToString(ConfigKeyPath::from({{"abc"}})), "--abc"); + ASSERT_EQ( + CommandLineArguments::configKeyPathToString(ConfigKeyPath::from({{"some", "camel", "case"}})), + "--some-camel-case" + ); + ASSERT_EQ( + CommandLineArguments::configKeyPathToString( + ConfigKeyPath::from({{"some"}, {"subsectioned", "sequence"}}) + ), + "--some-subsectioned-sequence" + ); + ASSERT_EQ( + CommandLineArguments::configKeyPathToString(ConfigKeyPath::from({{"some", "more", "sections"}} + )), + "--some-more-sections" + ); +} diff --git a/src/config/config_interface.cpp b/src/config/config_interface.cpp new file mode 100644 index 000000000..262da98bc --- /dev/null +++ b/src/config/config_interface.cpp @@ -0,0 +1,3 @@ +#include "config/config_interface.h" + +namespace silo::config {} // namespace silo::config diff --git a/src/config/config_key_path.cpp b/src/config/config_key_path.cpp new file mode 100644 index 000000000..f5b3afdc3 --- /dev/null +++ b/src/config/config_key_path.cpp @@ -0,0 +1,59 @@ +#include "config/config_key_path.h" + +#include +#include + +#include "config/backend/yaml_file.h" + +namespace { +bool isLowerCaseOrNumeric(char c) { + return (std::islower(c) || std::isdigit(c)); +} +} // namespace + +namespace silo::config { + +ConfigKeyPath ConfigKeyPath::from(std::vector> paths) { + for (const auto& sublevel : paths) { + for (const std::string& string : sublevel) { + if (string.empty()) { + throw std::runtime_error( + "Internal Error: tried to create ConfigKeyPath with an empty part." + ); + } + if (!std::ranges::all_of(string, isLowerCaseOrNumeric)) { + throw std::runtime_error( + "Internal Error: tried to create ConfigKeyPath of a value that is not lower-case or " + "numberic." + ); + } + } + } + ConfigKeyPath result; + result.path = paths; + return result; +} + +std::string ConfigKeyPath::toDebugString() const { + return YamlFile::configKeyPathToString(*this); +} + +AmbiguousConfigKeyPath AmbiguousConfigKeyPath::from(const ConfigKeyPath& key_path) { + std::vector flat_map; + for (const auto& sublevel : key_path.path) { + std::ranges::copy(sublevel, std::back_inserter(flat_map)); + } + return {flat_map}; +} + +} // namespace silo::config + +std::size_t std::hash::operator()( + const silo::config::ConfigKeyPath& key +) const { + std::size_t seed = 0; + for (const auto& segment : key.path) { + boost::hash_combine(seed, segment); + } + return seed; +} diff --git a/src/config/config_specification.cpp b/src/config/config_specification.cpp new file mode 100644 index 000000000..6ba716a1e --- /dev/null +++ b/src/config/config_specification.cpp @@ -0,0 +1,113 @@ +#include "config/config_specification.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "config/backend/command_line_arguments.h" +#include "config/backend/environment_variables.h" +#include "config/backend/yaml_file.h" +#include "config/config_key_path.h" +#include "silo/common/cons_list.h" +#include "silo/common/fmt_formatters.h" +#include "silo/common/panic.h" +#include "silo/common/string_utils.h" + +namespace { +std::string indent(std::string_view indentation, const std::string& str) { + auto lines = silo::splitBy(str, "\n"); + std::string out{}; + for (const auto& line : lines) { + // Can't do: out.push_back(std::string { indentation }); + for (char chr : indentation) { + out.push_back(chr); + } + for (char chr : line) { + out.push_back(chr); + } + out.push_back('\n'); + } + return out; +} +} // namespace + +namespace silo::config { + +std::optional ConfigSpecification::getValueSpecificationFromAmbiguousKey( + const silo::config::AmbiguousConfigKeyPath& key +) const { + for (const auto& field : fields) { + if (key == AmbiguousConfigKeyPath::from(field.key)) { + return field; + } + } + return std::nullopt; +} + +std::optional ConfigSpecification::getValueSpecification( + const silo::config::ConfigKeyPath& key +) const { + auto it = std::find_if(fields.begin(), fields.end(), [&](const ConfigValueSpecification& x) { + return x.key.path == key.path; + }); + if (it == fields.end()) { + return std::nullopt; + } + return *it; +} + +std::string ConfigSpecification::helpText() const { + std::ostringstream help_text; + help_text << "Usage: " << program_name << " [options...]\n" + << " or: silo api|preprocess [options...]\n" + << "\n" + << " Showing the options for " << program_name + << ". To see the options for the sister\n" + << " program, use 'silo api|preprocess --help'.\n" + << "\n" + << " Options override environment variables which override YAML file entries.\n" + << " The following options are valid:\n"; + // ^ XX are keys with dot working in YAML? Or have to describe what is meant? + auto addln = [&help_text](const std::string& line) { help_text << line << "\n"; }; + + for (const auto& field_spec : fields) { + addln(""); + const std::string_view type_text = field_spec.type == ConfigValueType::BOOL + ? " (boolean, the option implies 'true')" + : configValueTypeToString(field_spec.type); + addln(fmt::format( + " {} {}", CommandLineArguments::configKeyPathToString(field_spec.key), type_text + )); + addln(fmt::format( + " Env var: {}", EnvironmentVariables::configKeyPathToString(field_spec.key) + )); + addln(fmt::format(" YAML key: {}", YamlFile::configKeyPathToString(field_spec.key))); + addln("\n" + indent(std::string_view{" "}, std::string{field_spec.help_text})); + addln( + field_spec.default_value.has_value() + ? fmt::format(" Default: {}", field_spec.default_value->toString()) + : " No default." + ); + } + + return help_text.str(); +} + +VerifiedConfigSource ConfigSpecification::getConfigSourceFromDefaults() const { + VerifiedConfigSource result; + for (auto& x : fields) { + if (x.default_value) { + result.config_values.emplace(x.key, x.default_value.value()); + } + } + return result; +} + +} // namespace silo::config diff --git a/src/config/config_value.cpp b/src/config/config_value.cpp new file mode 100644 index 000000000..88a976d61 --- /dev/null +++ b/src/config/config_value.cpp @@ -0,0 +1,96 @@ +#include "config/config_value.h" + +#include +#include + +#include "config/backend/yaml_file.h" +#include "silo/common/panic.h" + +namespace silo::config { + +ConfigValueType ConfigValue::getValueType() const { + return std::visit( + [](const auto& v) -> ConfigValueType { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return ConfigValueType::STRING; + } else if constexpr (std::is_same_v) { + return ConfigValueType::PATH; + } else if constexpr (std::is_same_v) { + return ConfigValueType::INT32; + } else if constexpr (std::is_same_v) { + return ConfigValueType::UINT32; + } else if constexpr (std::is_same_v) { + return ConfigValueType::UINT16; + } else if constexpr (std::is_same_v) { + return ConfigValueType::BOOL; + } else { + SILO_UNREACHABLE(); + } + }, + value + ); +} + +std::string ConfigValue::toString() const { + return std::visit( + [](const auto& v) -> std::string { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return fmt::format("'{}'", v); + } else if constexpr (std::is_same_v) { + return fmt::format("'{}'", v.string()); + } else if constexpr (std::is_same_v) { + return fmt::format("{}", v); + } else if constexpr (std::is_same_v) { + return fmt::format("{}", v); + } else if constexpr (std::is_same_v) { + return fmt::format("{}", v); + } else if constexpr (std::is_same_v) { + return fmt::format("{}", v); + } else { + SILO_PANIC("Unhandled variant type in ConfigValue::toString"); + } + }, + value + ); +} + +ConfigValue ConfigValueSpecification::getValueFromString(std::string value_string) const { + switch (type) { + case ConfigValueType::STRING: + return createValue(value_string); + case ConfigValueType::PATH: { + std::filesystem::path path = value_string; + return createValue(path); + } + case ConfigValueType::UINT32: { + uint32_t parsed_unsigned = boost::lexical_cast(value_string); + return createValue(parsed_unsigned); + } + case ConfigValueType::UINT16: { + uint16_t parsed_unsigned = boost::lexical_cast(value_string); + return createValue(parsed_unsigned); + } + case ConfigValueType::INT32: { + int32_t parsed_signed = boost::lexical_cast(value_string); + return createValue(parsed_signed); + } + case ConfigValueType::BOOL: + return createValue(true); + } +} + +ConfigValue ConfigValueSpecification::createValue( + std::variant value +) const { + ConfigValue created_value{value}; + if (created_value.getValueType() != type) { + throw std::runtime_error( + "Internal Error: value created for this specification that is of the wrong type." + ); + } // TODO change to SILO_ASSERT + return created_value; +} + +} // namespace silo::config diff --git a/src/config/environment_variables.test.cpp b/src/config/environment_variables.test.cpp new file mode 100644 index 000000000..20a81cd06 --- /dev/null +++ b/src/config/environment_variables.test.cpp @@ -0,0 +1,30 @@ +#include "config/backend/environment_variables.h" + +#include +#include + +using silo::config::ConfigKeyPath; +using silo::config::EnvironmentVariables; + +TEST(EnvironmentVariables, correctPrefixedUppercase) { + ASSERT_EQ(EnvironmentVariables::configKeyPathToString(ConfigKeyPath::from({{"a"}})), "SILO_A"); + ASSERT_EQ( + EnvironmentVariables::configKeyPathToString(ConfigKeyPath::from({{"abc"}})), "SILO_ABC" + ); + ASSERT_EQ( + EnvironmentVariables::configKeyPathToString(ConfigKeyPath::from({{"some", "snake", "case"}})), + "SILO_SOME_SNAKE_CASE" + ); + ASSERT_EQ( + EnvironmentVariables::configKeyPathToString( + ConfigKeyPath::from({{"some"}, {"subsectioned", "sequence"}}) + ), + "SILO_SOME_SUBSECTIONED_SEQUENCE" + ); + ASSERT_EQ( + EnvironmentVariables::configKeyPathToString( + ConfigKeyPath::from({{"some"}, {"more"}, {"sections"}}) + ), + "SILO_SOME_MORE_SECTIONS" + ); +} \ No newline at end of file diff --git a/src/config/verified_config_source.cpp b/src/config/verified_config_source.cpp new file mode 100644 index 000000000..b8b10f7a9 --- /dev/null +++ b/src/config/verified_config_source.cpp @@ -0,0 +1,114 @@ +#include "config/verified_config_source.h" + +#include "silo/common/panic.h" + +namespace silo::config { + +std::optional VerifiedConfigSource::getString(const ConfigKeyPath& config_key_path +) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::STRING) { + SILO_PANIC( + "Called getString called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +std::optional VerifiedConfigSource::getPath( + const ConfigKeyPath& config_key_path +) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::PATH) { + SILO_PANIC( + "Called getPath called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +std::optional VerifiedConfigSource::getInt32(const ConfigKeyPath& config_key_path) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::INT32) { + SILO_PANIC( + "Called getInt32 called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +std::optional VerifiedConfigSource::getUint32(const ConfigKeyPath& config_key_path +) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::UINT32) { + SILO_PANIC( + "Called getUint32 called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +std::optional VerifiedConfigSource::getUint16(const ConfigKeyPath& config_key_path +) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::UINT16) { + SILO_PANIC( + "Called getUint16 called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +std::optional VerifiedConfigSource::getBool(const ConfigKeyPath& config_key_path) const { + auto it = config_values.find(config_key_path); + if (it != config_values.end()) { + const ConfigValue& value = it->second; + if (value.getValueType() != ConfigValueType::BOOL) { + SILO_PANIC( + "Called getBool called on a ConfigKeyPath ('{}') that belongs to a value of another " + "type ({}).", + config_key_path.toDebugString(), + configValueTypeToString(value.getValueType()) + ); + } + return get(value.value); + } + return std::nullopt; +} + +} // namespace silo::config diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 000000000..09f82bcb9 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,111 @@ +#include +#include +#include + +#include + +#include "silo/common/overloaded.h" +#include "silo/config/preprocessing_config.h" +#include "silo/config/runtime_config.h" +#include "silo/config/util/config_repository.h" +#include "silo/database.h" +#include "silo/preprocessing/preprocessor.h" +#include "silo_api/api.h" +#include "silo_api/logging.h" + +/// Does not throw exceptions +static int runPreprocessor(const silo::config::PreprocessingConfig& preprocessing_config) { + auto database_config = silo::config::ConfigRepository().getValidatedConfig( + preprocessing_config.getDatabaseConfigFilename() + ); + + SPDLOG_INFO("preprocessing - reading reference genome"); + const auto reference_genomes = + silo::ReferenceGenomes::readFromFile(preprocessing_config.getReferenceGenomeFilename()); + + silo::common::LineageTreeAndIdMap lineage_definitions; + if (auto lineage_file_name = preprocessing_config.getLineageDefinitionsFilename()) { + SPDLOG_INFO( + "preprocessing - read and verify the lineage tree '{}'", lineage_file_name.value().string() + ); + lineage_definitions = + silo::common::LineageTreeAndIdMap::fromLineageDefinitionFilePath(lineage_file_name.value() + ); + } + + auto preprocessor = silo::preprocessing::Preprocessor( + preprocessing_config, database_config, reference_genomes, std::move(lineage_definitions) + ); + auto database = preprocessor.preprocess(); + + database.saveDatabaseState(preprocessing_config.output_directory); + return 0; +} + +static int runApi(const silo::config::RuntimeConfig& runtime_config) { + SiloServer server; + return server.runApi(runtime_config); +} + +enum class ExecutionMode { PREPROCESSING, API }; + +int main(int argc, char** argv) { + setupLogger(); + + std::vector all_args(argv, argv + argc); + + std::filesystem::path program_path{all_args[0]}; + + std::string program_name = program_path.filename(); + + std::span args(all_args.begin() + 1, all_args.end()); + + ExecutionMode mode; + if (program_name == "siloPreprocessor") { + mode = ExecutionMode::PREPROCESSING; + } else if (program_name == "siloServer") { + mode = ExecutionMode::API; + } else if (!args.empty()) { + const std::string& mode_argument = args[0]; + args = {args.begin() + 1, args.end()}; + if (mode_argument == "preprocessing") { + mode = ExecutionMode::PREPROCESSING; + } else if (mode_argument == "api") { + mode = ExecutionMode::API; + } else { + std::cerr << program_name + << ": need either 'preprocessing' or 'api' as the first program argument, got '" + << mode_argument << "'\n"; + return 1; + } + } else { + std::cerr << program_name + << ": need either 'preprocessing' or 'api' as the first program argument\n"; + return 1; + } + + switch (mode) { + case ExecutionMode::PREPROCESSING: + return std::visit( + overloaded{ + [&](const silo::config::PreprocessingConfig& preprocessing_config) { + SPDLOG_TRACE("preprocessing_config = {}", preprocessing_config); + return runPreprocessor(preprocessing_config); + }, + [&](int32_t exit_code) { return exit_code; } + }, + silo::config::getConfig(args) + ); + case ExecutionMode::API: + return std::visit( + overloaded{ + [&](const silo::config::RuntimeConfig& runtime_config) { + SPDLOG_TRACE("runtime_config = {}", runtime_config); + return runApi(runtime_config); + }, + [&](int32_t exit_code) { return exit_code; } + }, + silo::config::getConfig(args) + ); + } +} diff --git a/src/silo/common/fmt_formatters.cpp b/src/silo/common/fmt_formatters.cpp new file mode 100644 index 000000000..65e019e65 --- /dev/null +++ b/src/silo/common/fmt_formatters.cpp @@ -0,0 +1,30 @@ +#include "silo/common/fmt_formatters.h" + +namespace silo::common { + +std::string toIsoString( + const std::chrono::time_point& time_point +) { + auto duration_since_epoch = time_point.time_since_epoch(); + + // Convert the time_point to system time (std::time_t) + auto seconds_since_epoch = + std::chrono::duration_cast(duration_since_epoch); + std::time_t time = seconds_since_epoch.count(); + + // Get the nanoseconds part + auto nanoseconds = + std::chrono::duration_cast(duration_since_epoch) % 1'000'000'000; + + // Convert to UTC time (std::tm) + std::tm utime = *std::gmtime(&time); + + // Create an ISO 8601 string with nanoseconds precision + std::ostringstream oss; + oss << std::put_time(&utime, "%Y-%m-%dT%H:%M:%S"); + oss << '.' << std::setfill('0') << std::setw(9) << nanoseconds.count() + << 'Z'; // Appending 'Z' for UTC time + + return oss.str(); +} +} // namespace silo::common diff --git a/src/silo/common/type_name.cpp b/src/silo/common/type_name.cpp new file mode 100644 index 000000000..fe9b6b6ca --- /dev/null +++ b/src/silo/common/type_name.cpp @@ -0,0 +1,57 @@ +#include "silo/common/type_name.h" + +#include + +// Specializations + +namespace silo::common { + +template <> +std::string typeName() { + return "string"; +} + +template <> +std::string typeName() { + return "path"; +} + +template <> +std::string typeName() { + return "bool"; +} + +template <> +std::string typeName() { + return "u32"; +} +template <> +std::string typeName() { + return "i32"; +} +template <> +std::string typeName() { + return "u64"; +} +template <> +std::string typeName() { + return "i64"; +} +template <> +std::string typeName() { + return "u16"; +} +template <> +std::string typeName() { + return "i16"; +} +template <> +std::string typeName() { + return "u8"; +} +template <> +std::string typeName() { + return "i8"; +} + +} // namespace silo::common diff --git a/src/silo/config/preprocessing_config.cpp b/src/silo/config/preprocessing_config.cpp index f55d9ba54..51f795f28 100644 --- a/src/silo/config/preprocessing_config.cpp +++ b/src/silo/config/preprocessing_config.cpp @@ -5,41 +5,122 @@ #include -#include "silo/config/util/abstract_config_source.h" +#include "silo/common/fmt_formatters.h" #include "silo/preprocessing/preprocessing_exception.h" namespace silo::config { +const ConfigKeyPath HELP_OPTION_KEY = YamlFile::stringToConfigKeyPath("help"); +const ConfigKeyPath PREPROCESSING_CONFIG_OPTION_KEY = + YamlFile::stringToConfigKeyPath("preprocessingConfig"); +const ConfigKeyPath INPUT_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("inputDirectory"); +const ConfigKeyPath OUTPUT_DIRECTORY_OPTION_KEY = + YamlFile::stringToConfigKeyPath("outputDirectory"); +const ConfigKeyPath INTERMEDIATE_RESULTS_DIRECTORY_OPTION_KEY = + YamlFile::stringToConfigKeyPath("intermediateResultsDirectory"); +const ConfigKeyPath PREPROCESSING_DATABASE_LOCATION_OPTION_KEY = + YamlFile::stringToConfigKeyPath("preprocessingDatabaseLocation"); +const ConfigKeyPath DUCKDB_MEMORY_LIMIT_IN_G_OPTION_KEY = + YamlFile::stringToConfigKeyPath("duckdbMemoryLimitInG"); +const ConfigKeyPath LINEAGE_DEFINITIONS_FILE_OPTION_KEY = + YamlFile::stringToConfigKeyPath("lineageDefinitionsFilename"); +const ConfigKeyPath NDJSON_INPUT_FILENAME_OPTION_KEY = + YamlFile::stringToConfigKeyPath("ndjsonInputFilename"); +const ConfigKeyPath DATABASE_CONFIG_FILE_OPTION_KEY = + YamlFile::stringToConfigKeyPath("databaseConfig"); +const ConfigKeyPath REFERENCE_GENOMES_FILENAME_OPTION_KEY = + YamlFile::stringToConfigKeyPath("referenceGenomeFilename"); + +// Specification of the fields in inputs to the PreprocessingConfig struct +ConfigSpecification PreprocessingConfig::getConfigSpecification() { + return ConfigSpecification{ + .program_name = "siloPreprocessing", + .fields{ + ConfigValueSpecification::createWithoutDefault( + HELP_OPTION_KEY, ConfigValueType::BOOL, "Show help text." + ), + ConfigValueSpecification::createWithoutDefault( + PREPROCESSING_CONFIG_OPTION_KEY, + ConfigValueType::PATH, + "Path to a preprocessing config that should be read before overwriting its values " + "with environment variables and other CLI arguments." + ), + ConfigValueSpecification::createWithDefault( + INPUT_DIRECTORY_OPTION_KEY, + ConfigValue::fromPath("./"), + "the path to the directory with the input files" + ), + ConfigValueSpecification::createWithDefault( + OUTPUT_DIRECTORY_OPTION_KEY, + ConfigValue::fromPath(DEFAULT_OUTPUT_DIRECTORY), + "the path to the directory to hold the output files" + ), + ConfigValueSpecification::createWithDefault( + INTERMEDIATE_RESULTS_DIRECTORY_OPTION_KEY, + ConfigValue::fromPath("./temp/"), + "the path to the directory to hold temporary files" + ), + ConfigValueSpecification::createWithoutDefault( + PREPROCESSING_DATABASE_LOCATION_OPTION_KEY, + ConfigValueType::PATH, + "the file where the duckdb database will be stored, which is used during preprocessing" + ), + ConfigValueSpecification::createWithoutDefault( + DUCKDB_MEMORY_LIMIT_IN_G_OPTION_KEY, + ConfigValueType::UINT32, + "DuckDB memory limit in GB" + ), + ConfigValueSpecification::createWithoutDefault( + LINEAGE_DEFINITIONS_FILE_OPTION_KEY, + ConfigValueType::PATH, + "file name of the file holding the lineage definitions" + ), + ConfigValueSpecification::createWithoutDefault( + NDJSON_INPUT_FILENAME_OPTION_KEY, + ConfigValueType::PATH, + "file name of the file holding NDJSON input" + ), + ConfigValueSpecification::createWithDefault( + DATABASE_CONFIG_FILE_OPTION_KEY, + ConfigValue::fromPath("database_config.yaml"), + "file name of the file holding the database table configuration" + ), + ConfigValueSpecification::createWithDefault( + REFERENCE_GENOMES_FILENAME_OPTION_KEY, + ConfigValue::fromPath("reference_genomes.json"), + "file name of the file holding the reference genome" + ), + } + }; +} + +PreprocessingConfig::PreprocessingConfig() { + overwriteFrom(getConfigSpecification().getConfigSourceFromDefaults()); +} + void PreprocessingConfig::validate() const { if (!std::filesystem::exists(input_directory)) { throw preprocessing::PreprocessingException(input_directory.string() + " does not exist"); } if (!ndjson_input_filename.has_value()) { throw preprocessing::PreprocessingException(fmt::format( - "{} must be specified as preprocessing option.", NDJSON_INPUT_FILENAME_OPTION.toCamelCase() + "{} must be specified as preprocessing option.", + NDJSON_INPUT_FILENAME_OPTION_KEY.toDebugString() )); } } -std::filesystem::path PreprocessingConfig::getOutputDirectory() const { - return output_directory; -} - -std::filesystem::path PreprocessingConfig::getIntermediateResultsDirectory() const { - return intermediate_results_directory; -} - -std::optional PreprocessingConfig::getPreprocessingDatabaseLocation() const { - return preprocessing_database_location; +uint32_t PreprocessingConfig::getDuckdbMemoryLimitInG() const { + return duckdb_memory_limit_in_g; } -[[nodiscard]] std::optional PreprocessingConfig::getDuckdbMemoryLimitInG() const { - return duckdb_memory_limit_in_g; +std::filesystem::path PreprocessingConfig::getDatabaseConfigFilename() const { + return input_directory / database_config_file; } std::optional PreprocessingConfig::getLineageDefinitionsFilename() const { return lineage_definitions_file.has_value() - ? std::optional(input_directory / *lineage_definitions_file) + ? std::optional(input_directory / lineage_definitions_file.value()) : std::nullopt; } @@ -53,79 +134,48 @@ std::optional PreprocessingConfig::getNdjsonInputFilename : std::nullopt; } -void PreprocessingConfig::overwrite(const silo::config::AbstractConfigSource& config) { - if (auto value = config.getString(INPUT_DIRECTORY_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - INPUT_DIRECTORY_OPTION.toString(), - config.configType(), - *value - ); - input_directory = *value; +bool PreprocessingConfig::asksForHelp() const { + return help; +} + +void PreprocessingConfig::overwriteFrom(const VerifiedConfigSource& config_source) { + if (auto var = config_source.getBool(HELP_OPTION_KEY)) { + help = var.value(); + } + if (auto var = config_source.getPath(PREPROCESSING_CONFIG_OPTION_KEY)) { + preprocessing_config = var.value(); + } + if (auto var = config_source.getPath(INPUT_DIRECTORY_OPTION_KEY)) { + input_directory = var.value(); } - if (auto value = config.getString(OUTPUT_DIRECTORY_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - OUTPUT_DIRECTORY_OPTION.toString(), - config.configType(), - *value - ); - output_directory = *value; + if (auto var = config_source.getPath(OUTPUT_DIRECTORY_OPTION_KEY)) { + output_directory = var.value(); } - if (auto value = config.getString(INTERMEDIATE_RESULTS_DIRECTORY_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - INTERMEDIATE_RESULTS_DIRECTORY_OPTION.toString(), - config.configType(), - *value - ); - intermediate_results_directory = *value; + if (auto var = config_source.getPath(INTERMEDIATE_RESULTS_DIRECTORY_OPTION_KEY)) { + intermediate_results_directory = var.value(); } - if (auto value = config.getString(PREPROCESSING_DATABASE_LOCATION_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - PREPROCESSING_DATABASE_LOCATION_OPTION.toString(), - config.configType(), - *value - ); - preprocessing_database_location = *value; + if (auto var = config_source.getPath(PREPROCESSING_DATABASE_LOCATION_OPTION_KEY)) { + preprocessing_database_location = var.value(); } - if (auto value = config.getUInt32(DUCKDB_MEMORY_LIMIT_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - DUCKDB_MEMORY_LIMIT_OPTION.toString(), - config.configType(), - *value - ); - duckdb_memory_limit_in_g = value; + if (auto var = config_source.getUint32(DUCKDB_MEMORY_LIMIT_IN_G_OPTION_KEY)) { + duckdb_memory_limit_in_g = var.value(); } - if (auto value = config.getString(LINEAGE_DEFINITIONS_FILENAME_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - LINEAGE_DEFINITIONS_FILENAME_OPTION.toString(), - config.configType(), - *value - ); - lineage_definitions_file = *value; + if (auto var = config_source.getPath(LINEAGE_DEFINITIONS_FILE_OPTION_KEY)) { + lineage_definitions_file = var.value(); } - if (auto value = config.getString(NDJSON_INPUT_FILENAME_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - NDJSON_INPUT_FILENAME_OPTION.toString(), - config.configType(), - *value - ); - ndjson_input_filename = *value; + if (auto var = config_source.getPath(NDJSON_INPUT_FILENAME_OPTION_KEY)) { + ndjson_input_filename = var.value(); } - if (auto value = config.getString(REFERENCE_GENOME_FILENAME_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - REFERENCE_GENOME_FILENAME_OPTION.toString(), - config.configType(), - *value - ); - reference_genome_file = *value; + if (auto var = config_source.getPath(DATABASE_CONFIG_FILE_OPTION_KEY)) { + database_config_file = var.value(); } + if (auto var = config_source.getPath(REFERENCE_GENOMES_FILENAME_OPTION_KEY)) { + reference_genome_file = var.value(); + } +} + +std::optional PreprocessingConfig::configPath() const { + return preprocessing_config; } } // namespace silo::config @@ -134,21 +184,11 @@ void PreprocessingConfig::overwrite(const silo::config::AbstractConfigSource& co const silo::config::PreprocessingConfig& preprocessing_config, fmt::format_context& ctx ) -> decltype(ctx.out()) { - return fmt::format_to( - ctx.out(), - "{{ input directory: '{}', lineage_definitions_file: {}, output_directory: '{}', " - "reference_genome_file: '{}', ndjson_filename: {}, preprocessing_database_location: {} }}", - preprocessing_config.input_directory.string(), - preprocessing_config.lineage_definitions_file.has_value() - ? "'" + preprocessing_config.lineage_definitions_file->string() + "'" - : "none", - preprocessing_config.output_directory.string(), - preprocessing_config.reference_genome_file.string(), - preprocessing_config.ndjson_input_filename.has_value() - ? "'" + preprocessing_config.ndjson_input_filename->string() + "'" - : "none", - preprocessing_config.preprocessing_database_location.has_value() - ? "'" + preprocessing_config.preprocessing_database_location->string() + "'" - : "none" - ); + fmt::format_to(ctx.out(), "{{\n"); + const char* perhaps_comma = " "; + + // TODO + (void)perhaps_comma; + + return fmt::format_to(ctx.out(), "}}\n"); } diff --git a/src/silo/config/preprocessing_config.test.cpp b/src/silo/config/preprocessing_config.test.cpp index cd974ed5e..8552c57da 100644 --- a/src/silo/config/preprocessing_config.test.cpp +++ b/src/silo/config/preprocessing_config.test.cpp @@ -3,7 +3,7 @@ #include #include -#include "silo/config/util/yaml_file.h" +#include "config/backend/yaml_file.h" #include "silo/preprocessing/preprocessing_exception.h" using silo::config::PreprocessingConfig; @@ -12,40 +12,35 @@ using silo::config::YamlFile; TEST(PreprocessingConfig, shouldReadConfigWithCorrectParametersAndDefaults) { PreprocessingConfig config; - ASSERT_NO_THROW(config.overwrite(YamlFile("./testBaseData/test_preprocessing_config.yaml"));); + ASSERT_NO_THROW( + config.overwriteFrom(YamlFile::readFile("./testBaseData/test_preprocessing_config.yaml") + .verify(PreprocessingConfig::getConfigSpecification())) + ); const std::string input_directory = "./testBaseData/exampleDataset/"; ASSERT_EQ(config.getNdjsonInputFilename(), input_directory + "input_file.ndjson"); ASSERT_EQ(config.getLineageDefinitionsFilename(), input_directory + "lineage_definitions.yaml"); } -TEST(PreprocessingConfig, shouldThrowExceptionWhenConfigFileDoesNotExist) { - PreprocessingConfig config; - EXPECT_THAT( - [&config]() { config.overwrite(YamlFile("testBaseData/does_not_exist.yaml")); }, - ThrowsMessage(::testing::HasSubstr("Failed to read preprocessing config")) - ); -} - TEST(PreprocessingConfig, shouldReadConfigWithOverriddenDefaults) { PreprocessingConfig config; - ASSERT_NO_THROW(config.overwrite( - YamlFile("./testBaseData/test_preprocessing_config_with_overridden_defaults.yaml") + ASSERT_NO_THROW(config.overwriteFrom( + YamlFile::readFile("./testBaseData/test_preprocessing_config_with_overridden_defaults.yaml") + .verify(PreprocessingConfig::getConfigSpecification()) );); const std::string input_directory = "./testBaseData/exampleDataset/"; ASSERT_EQ(config.getNdjsonInputFilename(), input_directory + "input_file.ndjson"); ASSERT_EQ(config.getLineageDefinitionsFilename(), input_directory + "lineage_definitions.yaml"); ASSERT_EQ(config.getDuckdbMemoryLimitInG(), 8); - ASSERT_EQ(config.getPreprocessingDatabaseLocation(), "preprocessing.duckdb"); + ASSERT_EQ(config.preprocessing_database_location, "preprocessing.duckdb"); - ASSERT_EQ(config.getOutputDirectory(), "./output/custom/"); + ASSERT_EQ(config.output_directory, "./output/custom/"); } TEST(PreprocessingConfig, shouldThrowErrorWhenNdjsonInputFileNameIsNotSet) { PreprocessingConfig config; - EXPECT_THAT( [&config]() { config.validate(); }, ThrowsMessage( diff --git a/src/silo/config/runtime_config.cpp b/src/silo/config/runtime_config.cpp index 7b1ec569e..a81f36aee 100644 --- a/src/silo/config/runtime_config.cpp +++ b/src/silo/config/runtime_config.cpp @@ -5,49 +5,148 @@ #include -#include "silo/config/util/abstract_config_source.h" +#include "config/backend/yaml_file.h" +#include "silo/common/fmt_formatters.h" namespace silo::config { -void RuntimeConfig::overwrite(const silo::config::AbstractConfigSource& config) { - if (auto value = config.getString(DATA_DIRECTORY_OPTION)) { - SPDLOG_DEBUG("Using dataDirectory passed via {}: {}", config.configType(), *value); - api_options.data_directory = *value; +const ConfigKeyPath HELP_OPTION_KEY = YamlFile::stringToConfigKeyPath("help"); +const ConfigKeyPath RUNTIME_CONFIG_OPTION_KEY = YamlFile::stringToConfigKeyPath("runtimeConfig"); +const ConfigKeyPath DATA_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("dataDirectory"); +const ConfigKeyPath API_PORT_OPTION_KEY = YamlFile::stringToConfigKeyPath("api.port"); +const ConfigKeyPath API_MAX_CONNECTIONS_OPTION_KEY = + YamlFile::stringToConfigKeyPath("api.maxQueuedHttpConnections"); +const ConfigKeyPath API_PARALLEL_THREADS_OPTION_KEY = + YamlFile::stringToConfigKeyPath("api.threadsForHttpConnections"); +const ConfigKeyPath API_ESTIMATED_STARTUP_TIME_OPTION_KEY = + YamlFile::stringToConfigKeyPath("api.estimatedStartupTimeInMinutes"); +const ConfigKeyPath QUERY_MATERIALIZATION_CUTOFF_OPTION_KEY = + YamlFile::stringToConfigKeyPath("query.materializationCutoff"); + +ConfigSpecification RuntimeConfig::getConfigSpecification() { + return { + .program_name = "siloServer", + .fields = + { + ConfigValueSpecification::createWithoutDefault( + HELP_OPTION_KEY, ConfigValueType::BOOL, "Show help text." + ), + ConfigValueSpecification::createWithoutDefault( + RUNTIME_CONFIG_OPTION_KEY, + ConfigValueType::PATH, + "Path to config file in YAML format." + ), + ConfigValueSpecification::createWithDefault( + DATA_DIRECTORY_OPTION_KEY, + ConfigValue::fromPath(DEFAULT_OUTPUT_DIRECTORY), + "The path to the directory with the data files (output from preprocessing)." + ), + ConfigValueSpecification::createWithDefault( + API_MAX_CONNECTIONS_OPTION_KEY, + ConfigValue::fromUint32(64), + "The maximum number of concurrent connections accepted at any time." + ), + ConfigValueSpecification::createWithDefault( + API_PARALLEL_THREADS_OPTION_KEY, + ConfigValue::fromUint32(4), + "The number of worker threads." + ), + ConfigValueSpecification::createWithDefault( + API_PORT_OPTION_KEY, + ConfigValue::fromUint16(8081), + "The port number on which to listen for incoming HTTP connections." + ), + ConfigValueSpecification::createWithoutDefault( + API_ESTIMATED_STARTUP_TIME_OPTION_KEY, + ConfigValueType::UINT32, + "Estimated time in minutes that the initial loading of the database takes. \n" + "As long as no database is loaded yet, SILO will throw a 503 error. \n" + "This option allows SILO to compute a Retry-After header for the 503 response." + ), + ConfigValueSpecification::createWithDefault( + QUERY_MATERIALIZATION_CUTOFF_OPTION_KEY, + ConfigValue::fromUint32(10000), + "Above how many records in a result set the result rows are to be constructed\n" + "lazily (by streaming)." + ), + } + }; +} + +RuntimeConfig::RuntimeConfig() { + overwriteFrom(getConfigSpecification().getConfigSourceFromDefaults()); +} + +bool RuntimeConfig::asksForHelp() const { + return help; +} + +std::optional RuntimeConfig::configPath() const { + return runtime_config; +} + +void RuntimeConfig::overwriteFrom(const VerifiedConfigSource& config_source) { + if (auto var = config_source.getBool(HELP_OPTION_KEY)) { + help = var.value(); + } + if (auto var = config_source.getPath(RUNTIME_CONFIG_OPTION_KEY)) { + runtime_config = var.value(); } - if (auto value = config.getInt32(MAX_CONNECTIONS_OPTION)) { - SPDLOG_DEBUG( - "Using {} passed via {}: {}", - MAX_CONNECTIONS_OPTION.toString(), - config.configType(), - *value - ); - api_options.max_connections = *value; + if (auto var = config_source.getPath(DATA_DIRECTORY_OPTION_KEY)) { + data_directory = var.value(); } - if (auto value = config.getInt32(PARALLEL_THREADS_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - PARALLEL_THREADS_OPTION.toString(), - config.configType(), - *value - ); - api_options.parallel_threads = *value; + if (auto var = config_source.getInt32(API_MAX_CONNECTIONS_OPTION_KEY)) { + api_options.max_connections = var.value(); } - if (auto value = config.getUInt32(PORT_OPTION)) { - SPDLOG_DEBUG( - "Using {} passed via {}: {}", PORT_OPTION.toString(), config.configType(), *value - ); - api_options.port = *value; + if (auto var = config_source.getInt32(API_PARALLEL_THREADS_OPTION_KEY)) { + api_options.parallel_threads = var.value(); } - if (auto value = config.getInt32(ESTIMATED_STARTUP_TIME_IN_MINUTES_OPTION)) { - SPDLOG_DEBUG( - "Using {} as passed via {}: {}", - ESTIMATED_STARTUP_TIME_IN_MINUTES_OPTION.toString(), - config.configType(), - *value - ); - const std::chrono::minutes minutes = std::chrono::minutes(*value); + if (auto var = config_source.getUint16(API_PORT_OPTION_KEY)) { + api_options.port = var.value(); + } + // But estimated_startup_end is a derived value: + if (auto var = config_source.getUint32(API_ESTIMATED_STARTUP_TIME_OPTION_KEY)) { + const std::chrono::minutes minutes = std::chrono::minutes(var.value()); api_options.estimated_startup_end = std::chrono::system_clock::now() + minutes; } } } // namespace silo::config + +[[maybe_unused]] auto fmt::formatter::format( + const silo::config::RuntimeConfig& runtime_config, + fmt::format_context& ctx +) -> decltype(ctx.out()) { + fmt::format_to(ctx.out(), "{{{{\n"); + const char* perhaps_comma = " "; + + fmt::format_to( + ctx.out(), "{} {}: '{}'", perhaps_comma, "data_directory", runtime_config.data_directory + ); + +#define CODE_FOR_FIELD(TOPLEVEL_FIELD, FIELD_NAME) \ + fmt::format_to( \ + ctx.out(), \ + "{} {}: '{}'", \ + perhaps_comma, \ + #FIELD_NAME, \ + runtime_config.TOPLEVEL_FIELD.FIELD_NAME \ + ); \ + perhaps_comma = ","; + + // struct ApiOptions + CODE_FOR_FIELD(api_options, max_connections); + CODE_FOR_FIELD(api_options, parallel_threads); + CODE_FOR_FIELD(api_options, port); + CODE_FOR_FIELD(api_options, estimated_startup_end); + + fmt::format_to(ctx.out(), "}}, {{\n"); + perhaps_comma = " "; + + // struct QueryOptions + CODE_FOR_FIELD(query_options, materialization_cutoff); + +#undef CODE_FOR_FIELD + + return fmt::format_to(ctx.out(), "}}}}\n"); +} diff --git a/src/silo/config/runtime_config.test.cpp b/src/silo/config/runtime_config.test.cpp index 4516062ab..7e334adb3 100644 --- a/src/silo/config/runtime_config.test.cpp +++ b/src/silo/config/runtime_config.test.cpp @@ -2,11 +2,19 @@ #include -#include "silo/config/util/yaml_file.h" +#include "config/backend/yaml_file.h" + +using silo::config::RuntimeConfig; +using silo::config::YamlFile; TEST(RuntimeConfig, shouldReadConfig) { - silo::config::RuntimeConfig runtime_config; - runtime_config.overwrite(silo::config::YamlFile("./testBaseData/test_runtime_config.yaml")); + RuntimeConfig runtime_config; + + auto source = YamlFile::readFile("./testBaseData/test_runtime_config.yaml") + .verify(RuntimeConfig::getConfigSpecification()); + + runtime_config.overwriteFrom(source); - ASSERT_EQ(runtime_config.api_options.data_directory, std::filesystem::path("test/directory")); + ASSERT_EQ(runtime_config.api_options.port, 1234); + ASSERT_EQ(runtime_config.data_directory, "test/directory"); } diff --git a/src/silo/config/util/abstract_config_source.cpp b/src/silo/config/util/abstract_config_source.cpp index 2f589e81a..e69de29bb 100644 --- a/src/silo/config/util/abstract_config_source.cpp +++ b/src/silo/config/util/abstract_config_source.cpp @@ -1,59 +0,0 @@ -#include "silo/config/util/abstract_config_source.h" - -#include -#include -#include -#include -#include - -#include "silo/config/util/config_exception.h" - -namespace silo::config { - -std::string AbstractConfigSource::Option::toString() const { - return boost::join(access_path, "."); -} - -std::string AbstractConfigSource::Option::toCamelCase() const { - return boost::join(access_path, ""); -} - -std::optional AbstractConfigSource::getInt32(const Option& option) const { - const auto string_value = getString(option); - if (string_value == std::nullopt) { - return std::nullopt; - } - try { - return boost::lexical_cast(*string_value); - } catch (boost::bad_lexical_cast&) { - const std::string error_message = fmt::format( - "Could not cast the value '{}' from the {} option '{}' to a 32-bit signed integer.", - *string_value, - configType(), - option.toString() - ); - SPDLOG_ERROR(error_message); - throw ConfigException(error_message); - } -} - -std::optional AbstractConfigSource::getUInt32(const Option& option) const { - const auto string_value = getString(option); - if (string_value == std::nullopt) { - return std::nullopt; - } - try { - return boost::lexical_cast(*string_value); - } catch (boost::bad_lexical_cast&) { - const std::string error_message = fmt::format( - "Could not cast the value '{}' from the {} option '{}' to a 32-bit unsigned integer.", - *string_value, - configType(), - option.toString() - ); - SPDLOG_ERROR(error_message); - throw ConfigException(error_message); - } -} - -} // namespace silo::config diff --git a/src/silo/config/util/yaml_file.cpp b/src/silo/config/util/yaml_file.cpp deleted file mode 100644 index f27e81fe9..000000000 --- a/src/silo/config/util/yaml_file.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "silo/config/util/yaml_file.h" - -#include -#include -#include - -using silo::config::YamlFile; - -YamlFile::YamlFile(const std::filesystem::path& filename) - : filename(filename) { - SPDLOG_INFO("Reading config from {}", filename.string()); - try { - node = YAML::LoadFile(filename.string()); - } catch (const YAML::Exception& e) { - throw std::runtime_error( - fmt::format("Failed to read preprocessing config from {}: {}", filename.string(), e.what()) - ); - } -} - -std::string YamlFile::configType() const { - return fmt::format("yaml config file '{}'", filename.string()); -} - -bool YamlFile::hasProperty(const Option& option) const { - YAML::Node current = Clone(node); - for (const auto& access : option.access_path) { - if (!current.IsDefined() || !current.IsMap()) { - return false; - } - current = current[access]; - } - return current.IsDefined(); -} - -std::optional YamlFile::getString(const Option& option) const { - YAML::Node current = Clone(node); - for (const auto& access : option.access_path) { - if (!current.IsDefined() || !current.IsMap()) { - return std::nullopt; - } - current = current[access]; - } - if (!current.IsDefined() || !current.IsScalar()) { - return std::nullopt; - } - return current.as(); -} diff --git a/src/silo/config/util/yaml_file.test.cpp b/src/silo/config/util/yaml_file.test.cpp index e959109f1..d5a99ce14 100644 --- a/src/silo/config/util/yaml_file.test.cpp +++ b/src/silo/config/util/yaml_file.test.cpp @@ -1,42 +1,127 @@ -#include "silo/config/util/yaml_file.h" +#include "config/backend/yaml_file.h" #include #include +#include "config/config_key_path.h" +#include "silo/common/fmt_formatters.h" + +using silo::config::ConfigKeyPath; using silo::config::YamlFile; -TEST(YamlFile, canCorrectlyCheckForPresentPropertiesCaseSensitively) { - const YamlFile under_test("./testBaseData/test_preprocessing_config.yaml"); +// static std::string configKeyPathToString(const ConfigKeyPath& key_path); + +// static ConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string); + +TEST(YamlFile, simpleStringToConfigKeyPath) { + auto under_test = YamlFile::stringToConfigKeyPath("test"); + ASSERT_EQ(under_test, (ConfigKeyPath::from({{{"test"}}}))); +} + +TEST(YamlFile, stringToConfigKeyPath1) { + auto under_test = YamlFile::stringToConfigKeyPath("api.port"); + ASSERT_EQ(under_test, (ConfigKeyPath::from({{"api"}, {"port"}}))); + ASSERT_NE(under_test, (ConfigKeyPath::from({{"api", "port"}}))); +} + +TEST(YamlFile, stringToConfigKeyPath2) { + auto under_test = YamlFile::stringToConfigKeyPath("query.materializationCutoff"); + ASSERT_EQ(under_test, (ConfigKeyPath::from({{"query"}, {"materialization", "cutoff"}}))); +} + +TEST(YamlFile, configKeyPathToString) { + ASSERT_EQ(YamlFile::configKeyPathToString(ConfigKeyPath::from({{"test"}})), "test"); + ASSERT_EQ( + YamlFile::configKeyPathToString( + (ConfigKeyPath::from({{"query"}, {"materialization", "cutoff"}})) + ), + "query.materializationCutoff" + ); +} - ASSERT_EQ(under_test.hasProperty({{"inputDirectory"}}), true); - ASSERT_EQ(under_test.hasProperty({{"INPUTDIRECTORY"}}), false); +TEST(YamlFile, validRoundTrip) { + auto under_test = + std::vector{"test", "somethingElse.that.is.quiteLong", "a.2.3.4", "asd", "aa"}; + for (const auto& string : under_test) { + ASSERT_EQ(YamlFile::configKeyPathToString(YamlFile::stringToConfigKeyPath(string)), string); + } } -TEST(YamlFile, canCorrectlyCheckForNonPresentProperties) { - const YamlFile under_test("./testBaseData/test_preprocessing_config.yaml"); +TEST(YamlFile, resolvesConfigKeyPath1) { + auto under_test = YamlFile::stringToConfigKeyPath("api.port"); + ASSERT_EQ(under_test, (ConfigKeyPath::from({{"api"}, {"port"}}))); + ASSERT_NE(under_test, (ConfigKeyPath::from({{"api", "port"}}))); +} - ASSERT_EQ(under_test.hasProperty({{"a"}}), false); +TEST(YamlFile, resolvesConfigKeyPath2) { + auto under_test = YamlFile::stringToConfigKeyPath("query.materializationCutoff"); + ASSERT_EQ(under_test, (ConfigKeyPath::from({{"query"}, {"materialization", "cutoff"}}))); } -TEST(YamlFile, getStringGetsCorrectField) { - const YamlFile under_test("./testBaseData/test_preprocessing_config.yaml"); +TEST(YamlFile, containsCorrectFieldsFromFlatYAML) { + const auto under_test = + YamlFile::readFile("./testBaseData/test_preprocessing_config.yaml").getYamlFields(); - ASSERT_EQ(under_test.getString({{"inputDirectory"}}), "./testBaseData/exampleDataset/"); + const std::unordered_map expected_result{ + {YamlFile::stringToConfigKeyPath("inputDirectory"), + YAML::Node{"./testBaseData/exampleDataset/"}}, + {YamlFile::stringToConfigKeyPath("outputDirectory"), YAML::Node{"./output/"}}, + {YamlFile::stringToConfigKeyPath("ndjsonInputFilename"), YAML::Node{"input_file.ndjson"}}, + {YamlFile::stringToConfigKeyPath("lineageDefinitionsFilename"), + YAML::Node{"lineage_definitions.yaml"}}, + {YamlFile::stringToConfigKeyPath("referenceGenomeFilename"), + YAML::Node{"reference_genomes.json"}}, + }; + + for (const auto& [key, value] : expected_result) { + ASSERT_TRUE(under_test.contains(key)); + ASSERT_EQ(under_test.at(key).as(), value.as()); + } + for (const auto& [key, value] : under_test) { + ASSERT_TRUE(expected_result.contains(key)); + ASSERT_EQ(expected_result.at(key).as(), value.as()); + } } -TEST(YamlFile, getStringGetsCorrectFieldsRepeatedly) { - const YamlFile under_test("./testBaseData/test_preprocessing_config.yaml"); +TEST(YamlFile, containsCorrectFieldsFromNestedYAML) { + const auto under_test = + YamlFile::readFile("./testBaseData/test_runtime_config.yaml").getYamlFields(); + + const std::unordered_map expected_result{ + {YamlFile::stringToConfigKeyPath("dataDirectory"), YAML::Node{"test/directory"}}, + {YamlFile::stringToConfigKeyPath("api.port"), YAML::Node{1234}}, + }; - ASSERT_EQ(under_test.getString({{"inputDirectory"}}), "./testBaseData/exampleDataset/"); - ASSERT_EQ(under_test.getString({{"outputDirectory"}}), "./output/"); - ASSERT_EQ(under_test.getString({{"ndjsonInputFilename"}}), "input_file.ndjson"); - ASSERT_EQ(under_test.getString({{"lineageDefinitionsFilename"}}), "lineage_definitions.yaml"); - ASSERT_EQ(under_test.getString({{"referenceGenomeFilename"}}), "reference_genomes.json"); + for (const auto& [key, value] : expected_result) { + ASSERT_TRUE(under_test.contains(key)); + ASSERT_EQ(under_test.at(key).as(), value.as()); + } + for (const auto& [key, value] : under_test) { + ASSERT_TRUE(expected_result.contains(key)); + ASSERT_EQ(expected_result.at(key).as(), value.as()); + } } -TEST(YamlFile, getStringNulloptOnNotPresent) { - const YamlFile under_test("./testBaseData/test_preprocessing_config.yaml"); +TEST(YamlFile, shouldThrowExceptionWhenConfigFileDoesNotExist) { + EXPECT_THAT( + []() { YamlFile::readFile("testBaseData/does_not_exist.yaml"); }, + ThrowsMessage( + ::testing::HasSubstr("Could not open the YAML file: 'testBaseData/does_not_exist.yaml'") + ) + ); +} - ASSERT_EQ(under_test.getString({{"a", "a"}}), std::nullopt); - ASSERT_EQ(under_test.getString({{"again_not_present"}}), std::nullopt); +TEST(YamlFile, shouldThrowExceptionWhenConfigFileCannotBeParsed) { + EXPECT_THAT( + []() { + YamlFile::fromYAML("string", R"( +X +s: +)"); + }, + ThrowsMessage( + ::testing::HasSubstr("string does not contain valid YAML: yaml-cpp: error at line 3, " + "column 2: illegal map value") + ) + ); } diff --git a/src/silo/database.test.cpp b/src/silo/database.test.cpp index 4ae6bb687..7d4ce1ba7 100644 --- a/src/silo/database.test.cpp +++ b/src/silo/database.test.cpp @@ -4,23 +4,27 @@ #include +#include "config/backend/yaml_file.h" #include "silo/common/nucleotide_symbols.h" #include "silo/config/preprocessing_config.h" #include "silo/config/util/config_repository.h" -#include "silo/config/util/yaml_file.h" #include "silo/database_info.h" #include "silo/preprocessing/preprocessor.h" #include "silo/preprocessing/sql_function.h" #include "silo/query_engine/query_engine.h" #include "silo/storage/reference_genomes.h" +using silo::config::PreprocessingConfig; + namespace { silo::Database buildTestDatabase() { const std::filesystem::path input_directory{"./testBaseData/unitTestDummyDataset/"}; - silo::config::PreprocessingConfig config; - config.overwrite(silo::config::YamlFile(input_directory / "preprocessing_config.yaml")); - + PreprocessingConfig config; + config.overwriteFrom( + silo::config::YamlFile::readFile(input_directory / "preprocessing_config.yaml") + .verify(PreprocessingConfig::getConfigSpecification()) + ); const auto database_config = silo::config::ConfigRepository().getValidatedConfig(input_directory / "database_config.yaml"); @@ -54,8 +58,11 @@ TEST(DatabaseTest, shouldBuildDatabaseWithoutErrors) { TEST(DatabaseTest, shouldSuccessfullyBuildDatabaseWithoutPartitionBy) { const std::filesystem::path input_directory{"./testBaseData/"}; - silo::config::PreprocessingConfig config; - config.overwrite(silo::config::YamlFile(input_directory / "test_preprocessing_config.yaml")); + PreprocessingConfig config; + config.overwriteFrom( + silo::config::YamlFile::readFile(input_directory / "test_preprocessing_config.yaml") + .verify(PreprocessingConfig::getConfigSpecification()) + ); const auto database_config = silo::config::ConfigRepository().getValidatedConfig( input_directory / "test_database_config_without_partition_by.yaml" diff --git a/src/silo/preprocessing/lineage_definition_file.cpp b/src/silo/preprocessing/lineage_definition_file.cpp index 958f780d6..8f29e126c 100644 --- a/src/silo/preprocessing/lineage_definition_file.cpp +++ b/src/silo/preprocessing/lineage_definition_file.cpp @@ -107,6 +107,10 @@ LineageDefinitionFile LineageDefinitionFile::fromYAMLFile(const std::filesystem: std::ostringstream contents; contents << file.rdbuf(); + if (contents.fail()) { + // TODO + } + try { return fromYAML(contents.str()); } catch (const YAML::ParserException& parser_exception) { diff --git a/src/silo/preprocessing/preprocessor.cpp b/src/silo/preprocessing/preprocessor.cpp index 0ebf453f9..f39e545fd 100644 --- a/src/silo/preprocessing/preprocessor.cpp +++ b/src/silo/preprocessing/preprocessor.cpp @@ -7,6 +7,7 @@ #include #include "silo/common/block_timer.h" +#include "silo/common/fmt_formatters.h" #include "silo/common/panic.h" #include "silo/common/string_utils.h" #include "silo/common/table_reader.h" @@ -52,7 +53,7 @@ Preprocessor::Preprocessor( reference_genomes(std::move(reference_genomes_)), lineage_tree(std::move(lineage_tree_)), preprocessing_db( - preprocessing_config.getPreprocessingDatabaseLocation(), + preprocessing_config.preprocessing_database_location, reference_genomes, preprocessing_config.getDuckdbMemoryLimitInG() ), @@ -79,13 +80,13 @@ Database Preprocessor::preprocess() { SPDLOG_INFO( "preprocessing - creating intermediate results directory '{}'", - preprocessing_config.getIntermediateResultsDirectory().string() + preprocessing_config.intermediate_results_directory ); - std::filesystem::create_directory(preprocessing_config.getIntermediateResultsDirectory()); - if (!std::filesystem::is_directory(preprocessing_config.getIntermediateResultsDirectory())) { + std::filesystem::create_directory(preprocessing_config.intermediate_results_directory); + if (!std::filesystem::is_directory(preprocessing_config.intermediate_results_directory)) { auto error = fmt::format( "Directory for intermediate results could not be created.", - preprocessing_config.getIntermediateResultsDirectory().string() + preprocessing_config.intermediate_results_directory ); SPDLOG_ERROR(error); throw silo::preprocessing::PreprocessingException(error); @@ -113,9 +114,7 @@ Database Preprocessor::preprocess() { SPDLOG_INFO("preprocessing - building database"); preprocessing_db.refreshConnection(); - return buildDatabase( - partition_descriptor, preprocessing_config.getIntermediateResultsDirectory() - ); + return buildDatabase(partition_descriptor, preprocessing_config.intermediate_results_directory); } void Preprocessor::finalizeConfig() { @@ -545,9 +544,8 @@ void Preprocessor::createUnalignedPartitionedSequenceFile( size_t sequence_idx, const std::string& table_sql ) { - const std::filesystem::path save_location = - preprocessing_config.getIntermediateResultsDirectory() / - fmt::format("unaligned_nuc_{}", sequence_idx); + const std::filesystem::path save_location = preprocessing_config.intermediate_results_directory / + fmt::format("unaligned_nuc_{}", sequence_idx); // duckdb OVERWRITE and OVERWRITE_OR_IGNORE is broken in the current version, // therefore we manually delete the save_location directory in case it already exists if (std::filesystem::exists(save_location)) { diff --git a/src/silo/preprocessing/preprocessor.invalid.test.cpp b/src/silo/preprocessing/preprocessor.invalid.test.cpp index 6b0111951..7567a54df 100644 --- a/src/silo/preprocessing/preprocessor.invalid.test.cpp +++ b/src/silo/preprocessing/preprocessor.invalid.test.cpp @@ -7,8 +7,8 @@ #include #include +#include "config/backend/yaml_file.h" #include "silo/config/util/config_repository.h" -#include "silo/config/util/yaml_file.h" #include "silo/database.h" #include "silo/database_info.h" #include "silo/preprocessing/preprocessing_exception.h" @@ -101,11 +101,10 @@ TEST_P(InvalidPreprocessorTestFixture, shouldNotProcessData) { const std::filesystem::path input_directory = fmt::format("test{}", millis); std::filesystem::create_directories(input_directory); - const PreprocessingConfig config_with_input_dir{ - .input_directory = input_directory, - .intermediate_results_directory = input_directory, - .ndjson_input_filename = "input.json" - }; + PreprocessingConfig config_with_input_dir; + config_with_input_dir.input_directory = input_directory; + config_with_input_dir.intermediate_results_directory = input_directory; + config_with_input_dir.ndjson_input_filename = "input.json"; config_with_input_dir.validate(); std::ofstream file(config_with_input_dir.getNdjsonInputFilename().value()); diff --git a/src/silo/preprocessing/preprocessor.test.cpp b/src/silo/preprocessing/preprocessor.test.cpp index 9c034484f..5bcd0f98f 100644 --- a/src/silo/preprocessing/preprocessor.test.cpp +++ b/src/silo/preprocessing/preprocessor.test.cpp @@ -3,13 +3,15 @@ #include #include +#include "config/backend/yaml_file.h" #include "silo/config/util/config_repository.h" -#include "silo/config/util/yaml_file.h" #include "silo/database.h" #include "silo/database_info.h" #include "silo/preprocessing/sql_function.h" #include "silo/query_engine/query_engine.h" +using silo::config::PreprocessingConfig; + namespace { struct Scenario { @@ -238,9 +240,13 @@ INSTANTIATE_TEST_SUITE_P( TEST_P(PreprocessorTestFixture, shouldProcessData) { const auto scenario = GetParam(); - silo::config::PreprocessingConfig config{.input_directory = scenario.input_directory}; + silo::config::PreprocessingConfig config; + config.input_directory = scenario.input_directory; - config.overwrite(silo::config::YamlFile(scenario.input_directory / "preprocessing_config.yaml")); + config.overwriteFrom( + silo::config::YamlFile::readFile(scenario.input_directory / "preprocessing_config.yaml") + .verify(PreprocessingConfig::getConfigSpecification()) + ); const auto database_config = silo::config::ConfigRepository().getValidatedConfig( scenario.input_directory / "database_config.yaml" diff --git a/src/silo/query_engine/operators/index_scan.test.cpp b/src/silo/query_engine/operators/index_scan.test.cpp index 6ec76a90b..90bc718c6 100644 --- a/src/silo/query_engine/operators/index_scan.test.cpp +++ b/src/silo/query_engine/operators/index_scan.test.cpp @@ -1,5 +1,6 @@ #include "silo/query_engine/operators/index_scan.h" +#include #include #include diff --git a/src/silo_api/api.cpp b/src/silo_api/api.cpp index 8a4d1e485..8446b4630 100644 --- a/src/silo_api/api.cpp +++ b/src/silo_api/api.cpp @@ -1,332 +1,44 @@ -#include -#include -#include -#include -#include +#include "silo_api/api.h" -#include - -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include "silo/common/lineage_tree.h" -#include "silo/config/database_config.h" -#include "silo/config/preprocessing_config.h" -#include "silo/config/runtime_config.h" -#include "silo/config/util/abstract_config_source.h" -#include "silo/config/util/config_repository.h" -#include "silo/config/util/yaml_file.h" -#include "silo/preprocessing/preprocessor.h" -#include "silo/preprocessing/sql_function.h" -#include "silo/storage/reference_genomes.h" -#include "silo_api/command_line_arguments.h" #include "silo_api/database_directory_watcher.h" #include "silo_api/database_mutex.h" -#include "silo_api/environment_variables.h" -#include "silo_api/logging.h" #include "silo_api/request_handler_factory.h" -namespace { - -const std::string PREPROCESSING_CONFIG_OPTION = "preprocessingConfig"; -const std::string RUNTIME_CONFIG_OPTION = "runtimeConfig"; -const std::string DATABASE_CONFIG_OPTION = "databaseConfig"; -const std::string API_OPTION = "api"; -const std::string PREPROCESSING_OPTION = "preprocessing"; +int SiloServer::runApi(const silo::config::RuntimeConfig& runtime_config) { + SPDLOG_INFO("Starting SILO API"); -using silo::config::YamlFile; -using silo_api::CommandLineArguments; -using silo_api::EnvironmentVariables; + silo_api::DatabaseMutex database_mutex; -silo::config::PreprocessingConfig preprocessingConfig( - const Poco::Util::AbstractConfiguration& config -) { - silo::config::PreprocessingConfig preprocessing_config; - if (std::filesystem::exists("./default_preprocessing_config.yaml")) { - preprocessing_config.overwrite(YamlFile("./default_preprocessing_config.yaml")); - } + const Poco::Net::ServerSocket server_socket(runtime_config.api_options.port); - if (config.hasProperty(PREPROCESSING_CONFIG_OPTION)) { - preprocessing_config.overwrite(YamlFile(config.getString(PREPROCESSING_CONFIG_OPTION))); - } else if (std::filesystem::exists("./preprocessing_config.yaml")) { - preprocessing_config.overwrite(YamlFile("./preprocessing_config.yaml")); - } + const silo_api::DatabaseDirectoryWatcher watcher(runtime_config.data_directory, database_mutex); - preprocessing_config.overwrite(EnvironmentVariables()); - preprocessing_config.overwrite(CommandLineArguments(config)); - preprocessing_config.validate(); + auto* const poco_parameter = new Poco::Net::HTTPServerParams; - SPDLOG_INFO("Resulting preprocessing config: {}", preprocessing_config); - return preprocessing_config; -} + SPDLOG_INFO("Using {} queued http connections", runtime_config.api_options.max_connections); + poco_parameter->setMaxQueued(runtime_config.api_options.max_connections); -silo::config::DatabaseConfig databaseConfig(const Poco::Util::AbstractConfiguration& config) { - if (config.hasProperty(DATABASE_CONFIG_OPTION)) { - return silo::config::ConfigRepository().getValidatedConfig( - config.getString(DATABASE_CONFIG_OPTION) - ); - } - SPDLOG_DEBUG("databaseConfig not found in config file. Using default value: databaseConfig.yaml" + SPDLOG_INFO( + "Using {} threads for http connections", runtime_config.api_options.parallel_threads ); - return silo::config::ConfigRepository().getValidatedConfig("database_config.yaml"); -} - -class SiloServer : public Poco::Util::ServerApplication { - protected: - [[maybe_unused]] void defineOptions(Poco::Util::OptionSet& options) override { - ServerApplication::defineOptions(options); - - options.addOption( - Poco::Util::Option() - .fullName("help") - .shortName("h") - .description("display help information on command line arguments") - .required(false) - .repeatable(false) - .callback(Poco::Util::OptionCallback(this, &SiloServer::displayHelp)) - ); - - options.addOption(Poco::Util::Option() - .fullName(PREPROCESSING_CONFIG_OPTION) - .description("path to the preprocessing config file") - .required(false) - .repeatable(false) - .argument("PATH") - .binding(PREPROCESSING_CONFIG_OPTION)); - - options.addOption(Poco::Util::Option() - .fullName(DATABASE_CONFIG_OPTION) - .description("path to the database config file") - .required(false) - .repeatable(false) - .argument("PATH") - .binding(DATABASE_CONFIG_OPTION)); - - options.addOption(Poco::Util::Option() - .fullName(silo::config::DATA_DIRECTORY_OPTION.toCamelCase()) - .shortName("d") - .description("path to the preprocessed data") - .required(false) - .repeatable(false) - .argument("PATH") - .binding(silo::config::DATA_DIRECTORY_OPTION.toCamelCase())); - - options.addOption(Poco::Util::Option() - .fullName(silo::config::PORT_OPTION.toCamelCase()) - .description("port to listen to requests") - .required(false) - .repeatable(false) - .argument("NUMBER") - .binding(silo::config::PORT_OPTION.toCamelCase())); - - options.addOption(Poco::Util::Option() - .fullName(silo::config::MAX_CONNECTIONS_OPTION.toCamelCase()) - .description("maximum number of http connections") - .required(false) - .repeatable(false) - .argument("NUMBER") - .binding(silo::config::MAX_CONNECTIONS_OPTION.toCamelCase())); - - options.addOption(Poco::Util::Option() - .fullName(silo::config::PARALLEL_THREADS_OPTION.toCamelCase()) - .description("number of threads for http connections") - .required(false) - .repeatable(false) - .argument("NUMBER") - .binding(silo::config::PARALLEL_THREADS_OPTION.toCamelCase())); - - options.addOption(Poco::Util::Option() - .fullName(API_OPTION) - .shortName("a") - .description("Execution mode: start the SILO web interface") - .required(false) - .repeatable(false) - .binding(API_OPTION) - .group("executionMode")); - - options.addOption( - Poco::Util::Option() - .fullName(PREPROCESSING_OPTION) - .shortName("p") - .description("Execution mode: trigger the preprocessing pipeline to generate a " - "partitioned dataset that can be read by the database") - .required(false) - .repeatable(false) - .binding(PREPROCESSING_OPTION) - .group("executionMode") - ); - - options.addOption( - Poco::Util::Option( - silo::config::ESTIMATED_STARTUP_TIME_IN_MINUTES_OPTION.toCamelCase(), - "t", - "Estimated time in minutes that the initial loading of the database takes. " - "As long as no database is loaded yet, SILO will throw a 503 error. " - "This option allows SILO to compute a Retry-After header for the 503 response. ", - false - ) - .required(false) - .repeatable(false) - .argument("MINUTES", true) - .binding(silo::config::ESTIMATED_STARTUP_TIME_IN_MINUTES_OPTION.toCamelCase()) - ); - } - - int main(const std::vector& args) override { - if (!args.empty()) { - std::cout << "Unknown arguments provided: " << boost::algorithm::join(args, ", ") - << "\n\n"; - displayHelp("", ""); - return Application::EXIT_USAGE; - } - - if (config().hasProperty(API_OPTION)) { - return handleApi(); - } - - if (config().hasProperty(PREPROCESSING_OPTION)) { - return handlePreprocessing(); - } - - std::cout << "No execution mode specified.\n\n"; - displayHelp("", ""); - return Application::EXIT_USAGE; - } - - private: - int handleApi() { - SPDLOG_INFO("Starting SILO API"); - silo::config::RuntimeConfig runtime_config; - if (config().hasProperty(RUNTIME_CONFIG_OPTION)) { - runtime_config.overwrite(YamlFile(config().getString(RUNTIME_CONFIG_OPTION))); - } else if (std::filesystem::exists("./runtime_config.yaml")) { - runtime_config.overwrite(YamlFile("./runtime_config.yaml")); - } - runtime_config.overwrite(EnvironmentVariables()); - runtime_config.overwrite(CommandLineArguments(config())); - - silo_api::DatabaseMutex database_mutex; + poco_parameter->setMaxThreads(runtime_config.api_options.parallel_threads); - const Poco::Net::ServerSocket server_socket(runtime_config.api_options.port); - - const silo_api::DatabaseDirectoryWatcher watcher( - runtime_config.api_options.data_directory, database_mutex - ); - - auto* const poco_parameter = new Poco::Net::HTTPServerParams; - - SPDLOG_INFO("Using {} queued http connections", runtime_config.api_options.max_connections); - poco_parameter->setMaxQueued(runtime_config.api_options.max_connections); - - SPDLOG_INFO( - "Using {} threads for http connections", runtime_config.api_options.parallel_threads - ); - poco_parameter->setMaxThreads(runtime_config.api_options.parallel_threads); - - Poco::Net::HTTPServer server( - new silo_api::SiloRequestHandlerFactory(database_mutex, runtime_config), - server_socket, - poco_parameter - ); - - SPDLOG_INFO("Listening on port {}", runtime_config.api_options.port); - - server.start(); - waitForTerminationRequest(); - server.stop(); - - return Application::EXIT_OK; - } - - silo::Database runPreprocessor(const silo::config::PreprocessingConfig& preprocessing_config) { - auto database_config = databaseConfig(config()); - - SPDLOG_INFO("preprocessing - reading reference genome"); - const auto reference_genomes = - silo::ReferenceGenomes::readFromFile(preprocessing_config.getReferenceGenomeFilename()); - - silo::common::LineageTreeAndIdMap lineage_definitions; - if (auto lineage_file_name = preprocessing_config.getLineageDefinitionsFilename()) { - SPDLOG_INFO( - "preprocessing - read and verify the lineage tree '{}'", - lineage_file_name.value().string() - ); - lineage_definitions = silo::common::LineageTreeAndIdMap::fromLineageDefinitionFilePath( - lineage_file_name.value() - ); - } - - auto preprocessor = silo::preprocessing::Preprocessor( - preprocessing_config, database_config, reference_genomes, std::move(lineage_definitions) - ); - - return preprocessor.preprocess(); - } - - int handlePreprocessing() { - SPDLOG_INFO("Starting SILO preprocessing"); - try { - const auto preprocessing_config = preprocessingConfig(config()); - - auto database = runPreprocessor(preprocessing_config); - - database.saveDatabaseState(preprocessing_config.getOutputDirectory()); - } catch (const std::exception& ex) { - SPDLOG_ERROR(ex.what()); - throw ex; - } catch (const std::string& ex) { - SPDLOG_ERROR(ex); - return 1; - } catch (...) { - SPDLOG_ERROR("Preprocessing cancelled with uncatchable (...) exception"); - const auto exception = std::current_exception(); - if (exception) { - const auto* message = abi::__cxa_current_exception_type()->name(); - SPDLOG_ERROR("current_exception: {}", message); - } - return 1; - } - return Application::EXIT_OK; - } - - void displayHelp( - const std::string& /*name*/, - const std::string& /*value*/ - ) { - Poco::Util::HelpFormatter help_formatter(options()); - help_formatter.setCommand(commandName()); - help_formatter.setUsage("OPTIONS"); - help_formatter.setHeader("SILO - Sequence Indexing engine for Large Order of genomic data"); - help_formatter.format(std::cout); - } -}; - -} // namespace - -int main(int argc, char** argv) { - setupLogger(); - - SPDLOG_INFO("Starting SILO"); + Poco::Net::HTTPServer server( + new silo_api::SiloRequestHandlerFactory(database_mutex, runtime_config), + server_socket, + poco_parameter + ); - SiloServer app; - const auto return_code = app.run(argc, argv); + SPDLOG_INFO("Listening on port {}", runtime_config.api_options.port); - SPDLOG_INFO("Stopping SILO"); - spdlog::default_logger()->flush(); + server.start(); + waitForTerminationRequest(); + server.stop(); - return return_code; + return Application::EXIT_OK; } diff --git a/src/silo_api/command_line_arguments.cpp b/src/silo_api/command_line_arguments.cpp deleted file mode 100644 index f16cac34f..000000000 --- a/src/silo_api/command_line_arguments.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "silo_api/command_line_arguments.h" - -#include -#include -#include - -#include "silo/config/util/config_exception.h" - -namespace silo_api { - -std::string CommandLineArguments::asUnixOptionString( - const silo::config::AbstractConfigSource::Option& option -) { - std::vector result; - for (const std::string& current_string : option.access_path) { - std::string current_result; - for (const char character : current_string) { - if (std::isupper(character)) { - current_result += '-'; - const char char_in_lower_case = - static_cast(std::tolower(static_cast(character))); - current_result += char_in_lower_case; - } else { - current_result += character; - } - } - result.emplace_back(current_result); - } - return boost::join(result, "-"); -} - -CommandLineArguments::CommandLineArguments(const Poco::Util::AbstractConfiguration& config) - : config(config) {} - -std::string CommandLineArguments::configType() const { - return "command line argument"; -} - -bool CommandLineArguments::hasProperty(const Option& option) const { - // TODO(#444) return config.hasProperty(asUnixOptionString(option)); - return config.hasProperty(option.toCamelCase()); -} - -std::optional CommandLineArguments::getString(const Option& option) const { - if (hasProperty(option)) { - // TODO(#444) return config.getString(asUnixOptionString(option)); - return config.getString(option.toCamelCase()); - } - return std::nullopt; -} - -} // namespace silo_api diff --git a/src/silo_api/command_line_arguments.test.cpp b/src/silo_api/command_line_arguments.test.cpp deleted file mode 100644 index 08e08df76..000000000 --- a/src/silo_api/command_line_arguments.test.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "silo_api/command_line_arguments.h" - -#include - -TEST(CommandLineArguments, correctUnixOptionString) { - ASSERT_EQ(silo_api::CommandLineArguments::asUnixOptionString({{""}}), ""); - ASSERT_EQ(silo_api::CommandLineArguments::asUnixOptionString({{"A"}}), "-a"); - ASSERT_EQ(silo_api::CommandLineArguments::asUnixOptionString({{"abc"}}), "abc"); - ASSERT_EQ( - silo_api::CommandLineArguments::asUnixOptionString({{"someCamelCase"}}), "some-camel-case" - ); - ASSERT_EQ( - silo_api::CommandLineArguments::asUnixOptionString({{"BADCamelCase"}}), "-b-a-d-camel-case" - ); - ASSERT_EQ( - silo_api::CommandLineArguments::asUnixOptionString({{"something_with_underscores"}}), - "something_with_underscores" - ); - ASSERT_EQ( - silo_api::CommandLineArguments::asUnixOptionString({{"some", "subsectionedSequence"}}), - "some-subsectioned-sequence" - ); - ASSERT_EQ( - silo_api::CommandLineArguments::asUnixOptionString({{"some", "more", "sections"}}), - "some-more-sections" - ); -} \ No newline at end of file diff --git a/src/silo_api/environment_variables.cpp b/src/silo_api/environment_variables.cpp deleted file mode 100644 index 503573d3c..000000000 --- a/src/silo_api/environment_variables.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "silo_api/environment_variables.h" - -#include -#include -#include -#include - -namespace silo_api { - -std::string EnvironmentVariables::prefixedUppercase(const Option& option) { - std::vector result; - for (const std::string& current_string : option.access_path) { - std::string current_result; - for (const char character : current_string) { - if (std::isupper(character)) { - current_result += '_'; - current_result += character; - } else { - const char char_in_upper_case = - static_cast(std::toupper(static_cast(character))); - current_result += char_in_upper_case; - } - } - result.emplace_back(current_result); - } - return "SILO_" + boost::join(result, "_"); -} - -std::string EnvironmentVariables::configType() const { - return "environment variable"; -} - -bool EnvironmentVariables::hasProperty(const Option& option) const { - return Poco::Environment::has(prefixedUppercase(option)); -} - -std::optional EnvironmentVariables::getString(const Option& option) const { - if (hasProperty(option)) { - return Poco::Environment::get(prefixedUppercase(option)); - } - return std::nullopt; -} - -} // namespace silo_api diff --git a/src/silo_api/environment_variables.test.cpp b/src/silo_api/environment_variables.test.cpp deleted file mode 100644 index bcce98499..000000000 --- a/src/silo_api/environment_variables.test.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "silo_api/environment_variables.h" - -#include -#include - -TEST(EnvironmentVariables, correctPrefixedUppercase) { - ASSERT_EQ(silo_api::EnvironmentVariables::prefixedUppercase({{""}}), "SILO_"); - ASSERT_EQ(silo_api::EnvironmentVariables::prefixedUppercase({{"A"}}), "SILO__A"); - ASSERT_EQ(silo_api::EnvironmentVariables::prefixedUppercase({{"abc"}}), "SILO_ABC"); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"someCamelCase"}}), "SILO_SOME_CAMEL_CASE" - ); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"BADCamelCase"}}), - "SILO__B_A_D_CAMEL_CASE" - ); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"something_with_underscores"}}), - "SILO_SOMETHING_WITH_UNDERSCORES" - ); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"something_with_underscores"}}), - "SILO_SOMETHING_WITH_UNDERSCORES" - ); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"some", "subsectionedSequence"}}), - "SILO_SOME_SUBSECTIONED_SEQUENCE" - ); - ASSERT_EQ( - silo_api::EnvironmentVariables::prefixedUppercase({{"some", "more", "sections"}}), - "SILO_SOME_MORE_SECTIONS" - ); -} \ No newline at end of file diff --git a/src/silo_api/error_request_handler.test.cpp b/src/silo_api/error_request_handler.test.cpp index 2685b49ef..b778bbf88 100644 --- a/src/silo_api/error_request_handler.test.cpp +++ b/src/silo_api/error_request_handler.test.cpp @@ -17,9 +17,11 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler { ); }; -const silo::config::RuntimeConfig TEST_RUNTIME_CONFIG = { - .api_options{.estimated_startup_end = std::chrono::system_clock::now()} -}; +const auto TEST_RUNTIME_CONFIG = [] { + silo::config::RuntimeConfig config; + config.api_options.estimated_startup_end = std::chrono::system_clock::now(); + return config; +}(); } // namespace diff --git a/src/silo_api/request_handler_factory.test.cpp b/src/silo_api/request_handler_factory.test.cpp index 99d2622b4..baf8726c3 100644 --- a/src/silo_api/request_handler_factory.test.cpp +++ b/src/silo_api/request_handler_factory.test.cpp @@ -59,7 +59,9 @@ silo::config::RuntimeConfig getRuntimeConfigThatEndsInXMinutes( std::chrono::minutes estimated_time_in_minutes ) { const std::chrono::time_point point = std::chrono::system_clock::now(); - return {.api_options = {.estimated_startup_end = point + estimated_time_in_minutes}}; + silo::config::RuntimeConfig result; + result.api_options.estimated_startup_end = point + estimated_time_in_minutes; + return result; } const int FOUR_MINUTES_IN_SECONDS = 240; diff --git a/testBaseData/emptyInputNdjson/preprocessing_config.yaml b/testBaseData/emptyInputNdjson/preprocessing_config.yaml index 21b68524c..d9228f3ac 100644 --- a/testBaseData/emptyInputNdjson/preprocessing_config.yaml +++ b/testBaseData/emptyInputNdjson/preprocessing_config.yaml @@ -1,3 +1,3 @@ ndjsonInputFilename: "input_file.ndjson" -pangoLineageDefinitionFilename: "lineage_definitions.yaml" +lineageDefinitionsFilename: "lineage_definitions.yaml" referenceGenomeFilename: "reference_genomes.json" diff --git a/testBaseData/test_runtime_config.yaml b/testBaseData/test_runtime_config.yaml index d3209a460..3614cd790 100644 --- a/testBaseData/test_runtime_config.yaml +++ b/testBaseData/test_runtime_config.yaml @@ -1 +1,3 @@ -dataDirectory: test/directory \ No newline at end of file +dataDirectory: "test/directory" +api: + port: 1234 \ No newline at end of file