Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Nov 20, 2024
1 parent cd7cf31 commit cb6c782
Show file tree
Hide file tree
Showing 14 changed files with 99 additions and 71 deletions.
6 changes: 3 additions & 3 deletions include/config/config_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ class ConfigSpecification {

std::string helpText() const;

std::optional<ConfigValueSpecification> getValueSpecification(const ConfigKeyPath& key
) const;
std::optional<ConfigValueSpecification> getValueSpecification(const ConfigKeyPath& key) const;

std::optional<ConfigValueSpecification> getValueSpecificationStrict(const ConfigKeyPath& key
std::optional<ConfigValueSpecification> getValueSpecificationFromAmbiguousKey(
const AmbiguousConfigKeyPath& key
) const;
};

Expand Down
14 changes: 0 additions & 14 deletions include/config/config_source_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,6 @@ struct ParseError {
/// dictionaries in yaml config files, joined with '-' for command
/// line arguments and '_' for environment variables. `ConfigSource`
/// provides the means to do this type-specific conversion
class ConfigSource {
public:
/// A human-readable description including type (command line,
/// config file, env var) and if applicable path to the file.
// used to be called `configType`
[[nodiscard]] virtual std::string configContext() const = 0;

/// Convert a config key path to a string for this kind of config
/// source.
[[nodiscard]] virtual std::string configKeyPathToString(const ConfigKeyPath& config_key_path
) const = 0;

virtual ~ConfigSource() = default;
};

class VerifiedConfigSource;

Expand Down
15 changes: 12 additions & 3 deletions include/config/config_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ class ConfigKeyPath {
[[nodiscard]] std::string toDebugString() const;
};

/// Like ConfigKeyPath, but it is impossible to decide whether the input value
/// meant to refer to api.port or apiPort. This is the case for CLI arguments (--api-port)
/// and Environment Variables (SILO_API_PORT)
class AmbiguousConfigKeyPath {
public:
std::vector<std::string> path;
};

enum class ConfigValueType { STRING, PATH, INT32, UINT32, UINT16, BOOL };

constexpr std::string_view configValueTypeToString(ConfigValueType type) {
Expand Down Expand Up @@ -136,20 +144,21 @@ class ConfigValueSpecification {
ConfigKeyPath key,
ConfigValueType value_type,
std::string_view help_text
){
) {
ConfigValueSpecification value_specification;
value_specification.key = key;
value_specification.type = value_type;
value_specification.help_text = help_text;
return value_specification;
}

/// No need for the value_type. It is implicitly defined by the default. Prevents misspecification.
/// No need for the value_type. It is implicitly defined by the default. Prevents
/// misspecification.
static ConfigValueSpecification createWithDefault(
ConfigKeyPath key,
ConfigValue default_value,
std::string_view help_text
){
) {
ConfigValueSpecification value_specification;
value_specification.key = key;
value_specification.type = default_value.getValueType();
Expand Down
2 changes: 1 addition & 1 deletion include/config/source/command_line_arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class CommandLineArguments : public RawConfigSource {

static std::string configKeyPathToString(const ConfigKeyPath& key_path);

static ConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string);
static AmbiguousConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string);
};

} // namespace silo::config
2 changes: 1 addition & 1 deletion include/config/source/environment_variables.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class EnvironmentVariables : public RawConfigSource {

static std::string configKeyPathToString(const ConfigKeyPath& key_path);

static ConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string); // TODO check whether AmbiguousKeyPath can be used
static AmbiguousConfigKeyPath stringToConfigKeyPath(const std::string& key_path_string);
};

} // namespace silo::config
26 changes: 17 additions & 9 deletions include/silo/config/preprocessing_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,30 @@
#include <Poco/Util/OptionSet.h>
#include <fmt/format.h>

#include "config/toplevel_interface.h"
#include "config/source/yaml_file.h"
#include "config/toplevel_interface.h"
#include "silo/config/config_defaults.h"

namespace silo::config {

const ConfigKeyPath HELP_OPTION_KEY = YamlFile::stringToConfigKeyPath("help");
const ConfigKeyPath INPUT_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("inputDirectory");
const ConfigKeyPath OUTPUT_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("outputDirectory");
const ConfigKeyPath INTERMEDIATE_RESULTS_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("intermediateResultsDirectory");
const ConfigKeyPath PREPROCESSING_DATABASE_LOCATION_OPTION_KEY = YamlFile::stringToConfigKeyPath("preprocessingDatabaseLocation");
const ConfigKeyPath DUCKDB_MEMORY_LIMIT_IN_G_OPTION_KEY = YamlFile::stringToConfigKeyPath("duckdbMemoryLimitInG");
const ConfigKeyPath LINEAGE_DEFINITIONS_FILE_OPTION_KEY = YamlFile::stringToConfigKeyPath("lineageDefinitionsFilename");
const ConfigKeyPath NDJSON_INPUT_FILENAME_OPTION_KEY = YamlFile::stringToConfigKeyPath("ndjsonInputFilename");
const ConfigKeyPath DATABASE_CONFIG_FILE_OPTION_KEY = YamlFile::stringToConfigKeyPath("databaseConfigFile");
const ConfigKeyPath REFERENCE_GENOMES_FILENAME_OPTION_KEY = YamlFile::stringToConfigKeyPath("referenceGenomeFilename");
const ConfigKeyPath OUTPUT_DIRECTORY_OPTION_KEY =
YamlFile::stringToConfigKeyPath("outputDirectory");
const ConfigKeyPath INTERMEDIATE_RESULTS_DIRECTORY_OPTION_KEY =
YamlFile::stringToConfigKeyPath("intermediateResultsDirectory");
const ConfigKeyPath PREPROCESSING_DATABASE_LOCATION_OPTION_KEY =
YamlFile::stringToConfigKeyPath("preprocessingDatabaseLocation");
const ConfigKeyPath DUCKDB_MEMORY_LIMIT_IN_G_OPTION_KEY =
YamlFile::stringToConfigKeyPath("duckdbMemoryLimitInG");
const ConfigKeyPath LINEAGE_DEFINITIONS_FILE_OPTION_KEY =
YamlFile::stringToConfigKeyPath("lineageDefinitionsFilename");
const ConfigKeyPath NDJSON_INPUT_FILENAME_OPTION_KEY =
YamlFile::stringToConfigKeyPath("ndjsonInputFilename");
const ConfigKeyPath DATABASE_CONFIG_FILE_OPTION_KEY =
YamlFile::stringToConfigKeyPath("databaseConfigFile");
const ConfigKeyPath REFERENCE_GENOMES_FILENAME_OPTION_KEY =
YamlFile::stringToConfigKeyPath("referenceGenomeFilename");

// Specification of the fields in inputs to the PreprocessingConfig struct
const ConfigSpecification PREPROCESSING_CONFIG_SPECIFICATION{
Expand Down
17 changes: 10 additions & 7 deletions src/config/config_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,22 @@ std::string indent(std::string_view indentation, const std::string& str) {

namespace silo::config {

std::optional<ConfigValueSpecification> ConfigSpecification::getValueSpecification(const silo::config::ConfigKeyPath& key) const {
std::optional<ConfigValueSpecification> ConfigSpecification::getValueSpecificationFromAmbiguousKey(
const silo::config::AmbiguousConfigKeyPath& key
) const {}

}

std::optional<ConfigValueSpecification> ConfigSpecification::getValueSpecificationStrict(const silo::config::ConfigKeyPath& key) const {
auto it = std::find_if(fields.begin(), fields.end(), [&](const ConfigValueSpecification& x){return x.key.path == key.path;});
if(it == fields.end()){
std::optional<ConfigValueSpecification> ConfigSpecification::getValueSpecification(
const silo::config::ConfigKeyPath& key
) const {
auto it = std::find_if(fields.begin(), fields.end(), [&](const ConfigValueSpecification& x) {
return x.key.path == key.path;
});
if (it == fields.end()) {
return std::nullopt;
}
return *it;
}


std::string ConfigSpecification::helpText() const {
std::string program_name = "TODOXX"; // TODO
std::ostringstream help_text;
Expand Down
18 changes: 10 additions & 8 deletions src/config/source/command_line_arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,34 @@

namespace silo::config {

std::string asUnixOptionString(const ConfigKeyPath& config_key_path) {
std::string CommandLineArguments::configKeyPathToString(const ConfigKeyPath& key_path) {
std::vector<std::string> result{"-"};
for (const auto& sublevel : config_key_path.path) {
for (const auto& sublevel : key_path.path) {
for (const std::string& current_string : sublevel) {
result.push_back(current_string);
}
}
return boost::join(result, "-");
}

ConfigKeyPath asConfigKeyPath(const std::string& command_line_argument) {
AmbiguousConfigKeyPath CommandLineArguments::stringToConfigKeyPath(
const std::string& command_line_argument
) {
if (command_line_argument.empty() || command_line_argument[0] != '-') {
throw std::invalid_argument("Invalid Unix option string");
}

ConfigKeyPath config_key_path;
AmbiguousConfigKeyPath config_key_path;
// Remove the leading dash(es) and split by '-'
std::string trimmed = command_line_argument.substr(1); // Skip the first '-'
std::string trimmed = command_line_argument.substr(1); // Skip the first '-'
std::vector<std::string> tokens;

boost::split(tokens, trimmed, boost::is_any_of("-"));

// Here, for simplicity, treat each token as its own sublevel
for (const auto& token : tokens) {
if (!token.empty()) {
config_key_path.path.push_back({token});
config_key_path.path.push_back(token);
}
}

Expand All @@ -63,8 +65,8 @@ VerifiedConfigSource CommandLineArguments::verify(const ConfigSpecification& con
}
break;
}
const ConfigKeyPath ambiguous_key = asConfigKeyPath(arg);
if (auto value_specification_opt = config_struct.getValueSpecification(ambiguous_key)) {
const AmbiguousConfigKeyPath ambiguous_key = stringToConfigKeyPath(arg);
if (auto value_specification_opt = config_struct.getValueSpecificationFromAmbiguousKey(ambiguous_key)) {
auto value_specification = value_specification_opt.value();
std::string value_string;
if (value_specification.type == ConfigValueType::BOOL) {
Expand Down
13 changes: 8 additions & 5 deletions src/config/source/environment_variables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ using silo::config::ConfigKeyPath;
std::string prefixedUppercase(const ConfigKeyPath& option) {
std::vector<std::string> result;
for (const auto& sublevel : option.path) {
for(const std::string& current_string : sublevel){
for (const std::string& current_string : sublevel) {
std::string current_string_all_uppercase;
std::transform(current_string.begin(), current_string.end(),
std::back_inserter(current_string_all_uppercase),
[](unsigned char c) { return std::toupper(c); });
std::transform(
current_string.begin(),
current_string.end(),
std::back_inserter(current_string_all_uppercase),
[](unsigned char c) { return std::toupper(c); }
);
result.push_back(current_string_all_uppercase);
}
}
Expand Down Expand Up @@ -60,7 +63,7 @@ EnvironmentVariables EnvironmentVariables::decodeEnvironmentVariables(const char
std::vector<std::string> invalid_config_keys;
for (const auto& [key_string, value_string] : alist) {
auto ambiguous_key = EnvironmentVariables::stringToConfigKeyPath(key_string);
auto value_specification_opt = config_specification.getValueSpecification(ambiguous_key);
auto value_specification_opt = config_specification.getValueSpecificationFromAmbiguousKey(ambiguous_key);
if (value_specification_opt.has_value()) {
auto value_specification = value_specification_opt.value();
ConfigValue value = value_specification.getValueFromString(value_string);
Expand Down
15 changes: 10 additions & 5 deletions src/config/source/yaml_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using silo::config::ConfigKeyPath;
namespace {

// Only valid if `isProperSingularValue(node) == true`.
std::string stringFromYaml(const YAML::Node& node) {
std::string stringFromYaml(const YAML::Node& node) { // TODO check unused
return node.as<std::string>();
}

Expand All @@ -32,7 +32,7 @@ bool isProperSingularValue(const YAML::Node& node) {
return true;
}

std::vector<std::string> splitCamelCase(const std::string& camelCaseString){
std::vector<std::string> splitCamelCase(const std::string& camelCaseString) {
std::vector<std::string> result;
std::string current;

Expand Down Expand Up @@ -110,7 +110,7 @@ namespace silo::config {

std::string YamlFile::configKeyPathToString(const ConfigKeyPath& config_key_path) {
std::vector<std::string> camelCaseStrings;
for(const auto& list : config_key_path.path){
for (const auto& list : config_key_path.path) {
camelCaseStrings.emplace_back(joinCamelCase(list));
}
return boost::join(camelCaseStrings, ".");
Expand All @@ -123,7 +123,12 @@ YamlFile YamlFile::readFile(const std::filesystem::path& path) {
auto node = YAML::LoadFile(path_string);
// Collect all paths present
std::unordered_map<ConfigKeyPath, YAML::Node> paths;
yamlToPaths(fmt::format("YAML file '{}'", path_string), node, ConsList<std::vector<std::string>>{}, paths);
yamlToPaths(
fmt::format("YAML file '{}'", path_string),
node,
ConsList<std::vector<std::string>>{},
paths
);

return YamlFile{path, paths};
} catch (const YAML::Exception& e) {
Expand Down Expand Up @@ -167,7 +172,7 @@ VerifiedConfigSource YamlFile::verify(const ConfigSpecification& config_specific
std::vector<std::string> invalid_config_keys;
std::unordered_map<ConfigKeyPath, ConfigValue> provided_config_values;
for (const auto& [key, yaml] : getYamlFields()) {
auto value_specification = config_specification.getValueSpecificationStrict(key);
auto value_specification = config_specification.getValueSpecification(key);
if (!value_specification.has_value()) {
invalid_config_keys.push_back(configKeyPathToString(key));
} else {
Expand Down
3 changes: 1 addition & 2 deletions src/silo/config/preprocessing_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,8 @@ void PreprocessingConfig::overwriteFrom(const VerifiedConfigSource& config_sourc
fmt::format_to(ctx.out(), "{{\n");
const char* perhaps_comma = " ";


// TODO
(void) perhaps_comma;
(void)perhaps_comma;

return fmt::format_to(ctx.out(), "}}\n");
}
14 changes: 9 additions & 5 deletions src/silo/config/runtime_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@

#include <spdlog/spdlog.h>

#include "silo/common/fmt_formatters.h"
#include "config/source/yaml_file.h"
#include "silo/common/fmt_formatters.h"

namespace silo::config {

const ConfigKeyPath HELP_OPTION_KEY = YamlFile::stringToConfigKeyPath("help");
const ConfigKeyPath RUNTIME_CONFIG_OPTION_KEY = YamlFile::stringToConfigKeyPath("runtimeConfig");
const ConfigKeyPath DATA_DIRECTORY_OPTION_KEY = YamlFile::stringToConfigKeyPath("dataDirectory");
const ConfigKeyPath API_PORT_OPTION_KEY = YamlFile::stringToConfigKeyPath("api.port");
const ConfigKeyPath API_MAX_CONNECTIONS_OPTION_KEY = YamlFile::stringToConfigKeyPath("api.maxQueuedHttpConnections");
const ConfigKeyPath API_PARALLEL_THREADS_OPTION_KEY = YamlFile::stringToConfigKeyPath("api.threadsForHttpConnections");
const ConfigKeyPath API_ESTIMATED_STARTUP_TIME_OPTION_KEY = YamlFile::stringToConfigKeyPath("api.estimatedStartupTimeInMinutes");
const ConfigKeyPath QUERY_MATERIALIZATION_CUTOFF_OPTION_KEY = YamlFile::stringToConfigKeyPath("query.materializationCutoff");
const ConfigKeyPath API_MAX_CONNECTIONS_OPTION_KEY =
YamlFile::stringToConfigKeyPath("api.maxQueuedHttpConnections");
const ConfigKeyPath API_PARALLEL_THREADS_OPTION_KEY =
YamlFile::stringToConfigKeyPath("api.threadsForHttpConnections");
const ConfigKeyPath API_ESTIMATED_STARTUP_TIME_OPTION_KEY =
YamlFile::stringToConfigKeyPath("api.estimatedStartupTimeInMinutes");
const ConfigKeyPath QUERY_MATERIALIZATION_CUTOFF_OPTION_KEY =
YamlFile::stringToConfigKeyPath("query.materializationCutoff");

const ConfigSpecification RUNTIME_CONFIG_SPECIFICATION{
"siloServer",
Expand Down
21 changes: 14 additions & 7 deletions src/silo/config/util/yaml_file.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ TEST(YamlFile, canCorrectlyCheckForPresentPropertiesCaseSensitively) {
const YamlFile under_test = YamlFile::readFile("./testBaseData/test_preprocessing_config.yaml");

const std::unordered_map<ConfigKeyPath, YAML::Node> expected_result{
{YamlFile::stringToConfigKeyPath("inputDirectory"), YAML::Node{"./testBaseData/exampleDataset/"}},
{YamlFile::stringToConfigKeyPath("inputDirectory"),
YAML::Node{"./testBaseData/exampleDataset/"}},
{YamlFile::stringToConfigKeyPath("outputDirectory"), YAML::Node{"./output/"}},
{YamlFile::stringToConfigKeyPath("metadataFilename"), YAML::Node{"small_metadata_set.tsv"}},
{YamlFile::stringToConfigKeyPath("pangoLineageDefinitionFilename"), YAML::Node{"pangolineage_alias.json"}},
{YamlFile::stringToConfigKeyPath("referenceGenomeFilename"), YAML::Node{"reference_genomes.json"}},
{YamlFile::stringToConfigKeyPath("pangoLineageDefinitionFilename"),
YAML::Node{"pangolineage_alias.json"}},
{YamlFile::stringToConfigKeyPath("referenceGenomeFilename"),
YAML::Node{"reference_genomes.json"}},
};

ASSERT_EQ(under_test.getYamlFields(), expected_result);
Expand All @@ -32,12 +35,16 @@ TEST(YamlFile, shouldReadAnotherConfig) {
YamlFile::readFile("./testBaseData/test_preprocessing_config_with_overridden_defaults.yaml");

const std::unordered_map<ConfigKeyPath, YAML::Node> expected_result{
{YamlFile::stringToConfigKeyPath("inputDirectory"), YAML::Node{"./testBaseData/exampleDataset/"}},
{YamlFile::stringToConfigKeyPath("inputDirectory"),
YAML::Node{"./testBaseData/exampleDataset/"}},
{YamlFile::stringToConfigKeyPath("outputDirectory"), YAML::Node{"./output/custom/"}},
{YamlFile::stringToConfigKeyPath("intermediateResultsDirectory"), YAML::Node{"./output/overriddenTemp/"}},
{YamlFile::stringToConfigKeyPath("intermediateResultsDirectory"),
YAML::Node{"./output/overriddenTemp/"}},
{YamlFile::stringToConfigKeyPath("metadataFilename"), YAML::Node{"small_metadata_set.tsv"}},
{YamlFile::stringToConfigKeyPath("pangoLineageDefinitionFilename"), YAML::Node{"pangolineage_alias.json"}},
{YamlFile::stringToConfigKeyPath("referenceGenomeFilename"), YAML::Node{"reference_genomes.json"}},
{YamlFile::stringToConfigKeyPath("pangoLineageDefinitionFilename"),
YAML::Node{"pangolineage_alias.json"}},
{YamlFile::stringToConfigKeyPath("referenceGenomeFilename"),
YAML::Node{"reference_genomes.json"}},
{YamlFile::stringToConfigKeyPath("genePrefix"), YAML::Node{"aaSeq_"}},
{YamlFile::stringToConfigKeyPath("nucleotideSequencePrefix"), YAML::Node{""}},
};
Expand Down
4 changes: 3 additions & 1 deletion src/silo_api/command_line_arguments.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ TEST(CommandLineArguments, correctUnixOptionString) {
ASSERT_EQ(CommandLineArguments::configKeyPathToString({{{"", "A"}}}), "-a");
ASSERT_EQ(CommandLineArguments::configKeyPathToString({{{"abc"}}}), "abc");
ASSERT_EQ(CommandLineArguments::configKeyPathToString({{{"someCamelCase"}}}), "some-camel-case");
ASSERT_EQ(CommandLineArguments::configKeyPathToString({{{"BADCamelCase"}}}), "-b-a-d-camel-case");
ASSERT_EQ(
CommandLineArguments::configKeyPathToString({{{"BADCamelCase"}}}), "-b-a-d-camel-case"
);
ASSERT_EQ(
CommandLineArguments::configKeyPathToString({{{"something_with_underscores"}}}),
"something_with_underscores"
Expand Down

0 comments on commit cb6c782

Please sign in to comment.