Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: new operator for matching strings against regular expressions #534

Merged
merged 3 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ find_package(spdlog REQUIRED)
find_package(TBB REQUIRED)
find_package(yaml-cpp REQUIRED)
find_package(zstd REQUIRED)
find_package(re2 REQUIRED)

# ---------------------------------------------------------------------------
# Includes
Expand Down Expand Up @@ -97,6 +98,7 @@ target_link_libraries(
Poco::Net
Poco::Util
Poco::JSON
re2::re2
)

add_executable(siloApi "${CMAKE_SOURCE_DIR}/src/silo_api/api.cpp" $<TARGET_OBJECTS:silolib>)
Expand Down
8 changes: 8 additions & 0 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class SiloRecipe(ConanFile):
"spdlog/1.14.1",
"yaml-cpp/0.7.0",
"zstd/1.5.5",
"re2/20240702",
"abseil/20240116.1",
]

default_options = {
Expand Down Expand Up @@ -90,6 +92,10 @@ class SiloRecipe(ConanFile):
"poco/*:enable_redis": False,
"poco/*:enable_xml": False,
"poco/*:enable_zip": False,

"re2/*:shared": False,

"absl/*:shared": False,
}

def generate(self):
Expand All @@ -107,4 +113,6 @@ def generate(self):
deps.set_property("spdlog", "cmake_find_mode", "both")
deps.set_property("yaml-cpp", "cmake_find_mode", "both")
deps.set_property("zstd", "cmake_find_mode", "both")
deps.set_property("re2", "cmake_find_mode", "both")
deps.set_property("abseil", "cmake_find_mode", "both")
deps.generate()
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"testCaseName": "StringSearch that wants to match a non-existing column",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "this_column_does_not_exist",
"searchExpression": "test"
}
},
"expectedError": {
"error": "Bad request",
"message": "The database does not contain the string column 'this_column_does_not_exist'"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"testCaseName": "StringSearch that wants to match a non-string column",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "age",
"searchExpression": "test"
}
},
"expectedError": {
"error": "Bad request",
"message": "The database does not contain the string column 'age'"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"testCaseName": "StringSearch that contains an invalid regex",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "gisaid_epi_isl",
"searchExpression": "\\"
}
},
"expectedError": {
"error": "Bad request",
"message": "Invalid Regular Expression. The parsing of the regular expression failed with the error 'trailing \\'. See https://github.com/google/re2/wiki/Syntax for a Syntax specification."
}
}
21 changes: 21 additions & 0 deletions endToEndTests/test/queries/stringSearch_basic_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"testCaseName": "StringSearch with a basic regex",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "Not",
"child": {
"type": "StringSearch",
"column": "gisaid_epi_isl",
"searchExpression": "EPI"
}
}
},
"expectedQueryResult": [
{
"count": 0
}
]
}
18 changes: 18 additions & 0 deletions endToEndTests/test/queries/stringSearch_digitAmount.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"testCaseName": "StringSearch that matches the primary key to end with exactly six digits",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "gisaid_epi_isl",
"searchExpression": "^\\D*\\d{6}$"
}
},
"expectedQueryResult": [
{
"count": 12
}
]
}
18 changes: 18 additions & 0 deletions endToEndTests/test/queries/stringSearch_justAString.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"testCaseName": "StringSearch that matches exactly a string",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "division",
"searchExpression": "^Aargau$"
}
},
"expectedQueryResult": [
{
"count": 6
}
]
}
18 changes: 18 additions & 0 deletions endToEndTests/test/queries/stringSearch_prefix.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"testCaseName": "StringSearch with a regex matching the prefix",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "StringSearch",
"column": "gisaid_epi_isl",
"searchExpression": "^EP"
}
},
"expectedQueryResult": [
{
"count": 100
}
]
}
12 changes: 6 additions & 6 deletions endToEndTests/test/query.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ describe('The /query endpoint', () => {
);
invalidQueryTestCases.forEach(testCase =>
it('should return the expected error for the test case ' + testCase.testCaseName, async () => {
const response = await server
.post('/query')
.send(testCase.query)
.expect(400)
.expect('Content-Type', 'application/json');
return expect(response.body).to.deep.equal(testCase.expectedError);
const response = await server.post('/query').send(testCase.query);

const errorMessage = 'Actual result is:\n' + response.text + '\n';
expect(response.status, errorMessage).to.equal(400);
expect(response.header['content-type'], errorMessage).to.equal('application/json');
return expect(response.body, errorMessage).to.deep.equal(testCase.expectedError);
})
);

Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/bool_equals.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ using silo::common::OptionalBool;

struct BoolEquals : public Expression {
private:
std::string column;
std::string column_name;
OptionalBool value;

public:
explicit BoolEquals(std::string column, OptionalBool value);
explicit BoolEquals(std::string column_name, OptionalBool value);

[[nodiscard]] std::string toString() const override;

Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/date_between.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace silo::query_engine::filter_expressions {

class DateBetween : public Expression {
private:
std::string column;
std::string column_name;
std::optional<silo::common::Date> date_from;
std::optional<silo::common::Date> date_to;

Expand All @@ -46,7 +46,7 @@ class DateBetween : public Expression {

public:
explicit DateBetween(
std::string column,
std::string column_name,
std::optional<silo::common::Date> date_from,
std::optional<silo::common::Date> date_to
);
Expand Down
8 changes: 6 additions & 2 deletions include/silo/query_engine/filter_expressions/float_between.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@ namespace silo::query_engine::filter_expressions {

class FloatBetween : public Expression {
private:
std::string column;
std::string column_name;
std::optional<double> from;
std::optional<double> to;

public:
explicit FloatBetween(std::string column, std::optional<double> from, std::optional<double> to);
explicit FloatBetween(
std::string column_name,
std::optional<double> from,
std::optional<double> to
);

std::string toString() const override;

Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/float_equals.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ namespace silo::query_engine::filter_expressions {

class FloatEquals : public Expression {
private:
std::string column;
std::string column_name;
double value;

public:
FloatEquals(std::string column, double value);
FloatEquals(std::string column_name, double value);

std::string toString() const override;

Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/int_between.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ namespace silo::query_engine::filter_expressions {

class IntBetween : public Expression {
private:
std::string column;
std::string column_name;
std::optional<uint32_t> from;
std::optional<uint32_t> to;

public:
explicit IntBetween(
std::string column,
std::string column_name,
std::optional<uint32_t> from,
std::optional<uint32_t> to
);
Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/int_equals.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ namespace silo::query_engine::filter_expressions {

class IntEquals : public Expression {
private:
std::string column;
std::string column_name;
uint32_t value;

public:
explicit IntEquals(std::string column, uint32_t value);
explicit IntEquals(std::string column_name, uint32_t value);

[[nodiscard]] std::string toString() const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class Operator;
namespace silo::query_engine::filter_expressions {

class PangoLineageFilter : public Expression {
std::string column;
std::string column_name;
std::string lineage;
bool include_sublineages;

public:
explicit PangoLineageFilter(
std::string column,
std::string column_name,
std::string lineage_key,
bool include_sublineages
);
Expand Down
4 changes: 2 additions & 2 deletions include/silo/query_engine/filter_expressions/string_equals.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ namespace silo::query_engine::filter_expressions {

class StringEquals : public Expression {
private:
std::string column;
std::string column_name;
std::string value;

public:
explicit StringEquals(std::string column, std::string value);
explicit StringEquals(std::string column_name, std::string value);

std::string toString() const override;

Expand Down
43 changes: 43 additions & 0 deletions include/silo/query_engine/filter_expressions/string_search.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include <memory>
#include <string>

#include <re2/re2.h>
#include <nlohmann/json_fwd.hpp>

#include "silo/query_engine/filter_expressions/expression.h"

namespace silo {
class Database;
class DatabasePartition;
namespace query_engine {
namespace operators {
class Operator;
} // namespace operators
} // namespace query_engine
} // namespace silo

namespace silo::query_engine::filter_expressions {

class StringSearch : public Expression {
private:
std::string column_name;
std::unique_ptr<re2::RE2> search_expression;

public:
explicit StringSearch(std::string column_name, std::unique_ptr<re2::RE2> search_expression);

std::string toString() const override;

[[nodiscard]] std::unique_ptr<silo::query_engine::operators::Operator> compile(
const Database& database,
const DatabasePartition& database_partition,
AmbiguityMode mode
) const override;
};

// NOLINTNEXTLINE(readability-identifier-naming)
void from_json(const nlohmann::json& json, std::unique_ptr<StringSearch>& filter);

} // namespace silo::query_engine::filter_expressions
Loading