Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create the optimizer framework #219

Merged
merged 3 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libursa/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ add_library(
QueryParser.h
QueryResult.cpp
QueryResult.h
QueryOptimizer.cpp
QueryOptimizer.h
RawFile.cpp
RawFile.h
Responses.cpp
Expand Down
4 changes: 3 additions & 1 deletion libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "DatabaseName.h"
#include "Json.h"
#include "Query.h"
#include "QueryOptimizer.h"
#include "spdlog/fmt/ostr.h"
#include "spdlog/spdlog.h"

Expand Down Expand Up @@ -91,7 +92,8 @@ void OnDiskDataset::execute(const Query &query, ResultWriter *out,
for (const auto &ndx : get_indexes()) {
types_to_query.emplace(ndx.index_type());
}
const Query plan = query.plan(types_to_query);
Query plan = query.plan(types_to_query);
plan = q_optimize(std::move(plan));
spdlog::debug("PLAN: {}", plan);

QueryResult result = this->query(plan, counters);
Expand Down
7 changes: 7 additions & 0 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ const std::vector<Query> &Query::as_queries() const {
type != QueryType::MIN_OF) {
throw std::runtime_error("This query doesn\'t contain subqueries.");
}
return queries;
}

std::vector<Query> &Query::as_queries() {
if (type != QueryType::AND && type != QueryType::OR &&
type != QueryType::MIN_OF) {
throw std::runtime_error("This query doesn\'t contain subqueries.");
}
return queries;
}

Expand Down
5 changes: 3 additions & 2 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class PrimitiveQuery {
PrimitiveQuery(IndexType itype, TriGram trigram)
: itype(itype), trigram(trigram) {}

const IndexType itype;
const TriGram trigram;
IndexType itype;
TriGram trigram;

// We want to use PrimitiveQuery in STL containers, and this means they
// must be comparable using <. Specific order doesn't matter.
Expand Down Expand Up @@ -51,6 +51,7 @@ class Query {
Query &operator=(Query &&) = default;

const std::vector<Query> &as_queries() const;
std::vector<Query> &as_queries();
const QString &as_value() const;
uint32_t as_count() const;
std::string as_string_repr() const;
Expand Down
31 changes: 31 additions & 0 deletions libursa/QueryOptimizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "QueryOptimizer.h"

#include <vector>

// Run the optimization pases on subqueries.
// After this step, every subquery should be maximally optimized,
// So I believe there's no need to run this in a loop.
Query simplify_subqueries(Query &&q) {
// q_optimize ensures QueryType is not PRIMITIVE already
std::vector<Query> newqueries;
for (auto &&query : q.as_queries()) {
newqueries.emplace_back(q_optimize(std::move(query)));
}
if (q.get_type() == QueryType::MIN_OF) {
return q_min_of(q.as_count(), std::move(newqueries));
}
return std::move(Query(q.get_type(), std::move(newqueries)));
}

Query q_optimize(Query &&q) {
if (q.get_type() == QueryType::PRIMITIVE) {
// Nothing to improve here.
return std::move(q);
}

q = simplify_subqueries(std::move(q));

// Optimization passes will be added here later.

return std::move(q);
}
8 changes: 8 additions & 0 deletions libursa/QueryOptimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#pragma once

#include "Query.h"

// Optimizes a query, and returns the optimized version.
// Optimizations try to simplify the expression in various ways to make the
// execution faster - for example by enabling short-circuiting in some places.
Query q_optimize(Query &&query);
5 changes: 3 additions & 2 deletions libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ constexpr std::string_view ursadb_format_version = "1.5.0";

// Project version.
// Consider updating the version tag when doing PRs.
constexpr std::string_view ursadb_version_string =
"@PROJECT_VERSION@+debuglogs";
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt0";
// clang-format on
Loading