Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport a coroutine from the Bind class #1803

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 45 additions & 14 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,45 @@ IdTable Bind::cloneSubView(const IdTable& idTable,
return result;
}

// Anonymous namespace: This class is not exposed outside this translation unit.
namespace {
// An input range that lazily applies a `BIND` operation to a fully materialized
// subresult by splitting the subresult into chunks of size `chunkSize_`.
template <typename ApplyBind>
struct LazyBindForMaterializedInput
: ad_utility::InputRangeFromGet<Result::IdTableVocabPair> {
ApplyBind applyBind_;
std::shared_ptr<const Result> result_;
size_t chunkSize_;
size_t size_ = result_->idTable().size();
size_t offset_ = 0;

// Constructor. The `ApplyBind` function performs the actual `Bind`.
LazyBindForMaterializedInput(ApplyBind applyBind,
std::shared_ptr<const Result> result,
size_t chunkSize_)
: applyBind_(std::move(applyBind)),
result_{std::move(result)},
chunkSize_{chunkSize_} {}

// The `get` function that is needed for the `InputRangeFromGet`.
std::optional<Result::IdTableVocabPair> get() {
if (offset_ >= size_) {
return std::nullopt;
}
auto curOffset = offset_;
offset_ += chunkSize_;
LocalVocab outVocab = result_->getCopyOfLocalVocab();
IdTable idTable =
applyBind_(Bind::cloneSubView(
result_->idTable(),
{curOffset, std::min(size_, curOffset + chunkSize_)}),
&outVocab);
return Result::IdTableVocabPair{std::move(idTable), std::move(outVocab)};
}
};
} // namespace

// _____________________________________________________________________________
ProtoResult Bind::computeResult(bool requestLaziness) {
_subtree->setLimit(getLimit());
Expand All @@ -108,21 +147,13 @@ ProtoResult Bind::computeResult(bool requestLaziness) {

if (subRes->isFullyMaterialized()) {
if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) {
return {
[](auto applyBind,
std::shared_ptr<const Result> result) -> Result::Generator {
size_t size = result->idTable().size();
for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) {
LocalVocab outVocab = result->getCopyOfLocalVocab();
IdTable idTable = applyBind(
cloneSubView(result->idTable(),
{offset, std::min(size, offset + CHUNK_SIZE)}),
&outVocab);
co_yield {std::move(idTable), std::move(outVocab)};
}
}(std::move(applyBind), std::move(subRes)),
resultSortedOn()};
// The `LazyBindFor...` is the actual implementation, the `LazyResult`
// wraps it in a type-erased way.
return {Result::LazyResult{LazyBindForMaterializedInput{
std::move(applyBind), std::move(subRes), CHUNK_SIZE}},
resultSortedOn()};
}

// Make a deep copy of the local vocab from `subRes` and then add to it (in
// case BIND adds a new word or words).
//
Expand Down
4 changes: 4 additions & 0 deletions src/engine/Bind.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,13 @@ class Bind : public Operation {
private:
ProtoResult computeResult(bool requestLaziness) override;

public:
// This function has to be public, as a free struct in the `.cpp` file uses
// it.
static IdTable cloneSubView(const IdTable& idTable,
const std::pair<size_t, size_t>& subrange);

private:
// Implementation for the binding of arbitrary expressions.
IdTable computeExpressionBind(
LocalVocab* localVocab, IdTable idTable,
Expand Down
4 changes: 4 additions & 0 deletions src/engine/Result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ Result::Result(IdTableVocabPair pair, std::vector<ColumnIndex> sortedBy)

// _____________________________________________________________________________
Result::Result(Generator idTables, std::vector<ColumnIndex> sortedBy)
: Result{LazyResult{std::move(idTables)}, std::move(sortedBy)} {}

// _____________________________________________________________________________
Result::Result(LazyResult idTables, std::vector<ColumnIndex> sortedBy)
: data_{GenContainer{[](auto idTables, auto sortedBy) -> Generator {
std::optional<IdTable::row_type> previousId = std::nullopt;
for (IdTableVocabPair& pair : idTables) {
Expand Down
2 changes: 2 additions & 0 deletions src/engine/Result.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class Result {
LocalVocab&& localVocab);
Result(IdTableVocabPair pair, std::vector<ColumnIndex> sortedBy);
Result(Generator idTables, std::vector<ColumnIndex> sortedBy);
Result(LazyResult idTables, std::vector<ColumnIndex> sortedBy);

// Prevent accidental copying of a result table.
Result(const Result& other) = delete;
Result& operator=(const Result& other) = delete;
Expand Down
5 changes: 5 additions & 0 deletions src/util/Iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ class InputRangeFromGet {

public:
virtual ~InputRangeFromGet() = default;
InputRangeFromGet() = default;
InputRangeFromGet(InputRangeFromGet&&) = default;
InputRangeFromGet& operator=(InputRangeFromGet&&) = default;
InputRangeFromGet(const InputRangeFromGet&) = default;
InputRangeFromGet& operator=(const InputRangeFromGet&) = default;

// Get the next value and store it.
void getNextAndStore() { storage_ = get(); }
Expand Down
Loading