Skip to content

Commit a28c834

Browse files
committed
Modernize exception handling in parallel solve.
* On catch, store std::current_exception() and rethrow later on join.
1 parent 48e8bb7 commit a28c834

File tree

3 files changed

+47
-49
lines changed

3 files changed

+47
-49
lines changed

clasp/mt/parallel_solve.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class ParallelSolve : public SolveAlgorithm {
139139
// Thread setup
140140
void destroyThread(uint32_t id);
141141
void allocThread(uint32_t id, Solver& s);
142-
int joinThreads();
142+
void joinThreads();
143143
// -------------------------------------------------------------------------------------------
144144
// Algorithm steps
145145
void setIntegrate(uint32_t grace, uint8_t filter);
@@ -150,13 +150,13 @@ class ParallelSolve : public SolveAlgorithm {
150150
auto doNext(Val_t last) -> Val_t override;
151151
void doStop() override;
152152
void doDetach() override;
153-
bool doInterrupt() override;
153+
bool doInterrupt() noexcept override;
154154
void solveParallel(uint32_t id);
155155
void initQueue();
156156
bool requestWork(Solver& s, Path& out);
157157
void terminate(const Solver& s, bool complete);
158158
bool waitOnSync(const Solver& s);
159-
void exception(uint32_t id, Path path, int err, const char* what);
159+
void exception(uint32_t id, Path path, bool oom) noexcept;
160160
void reportProgress(const Event& ev) const;
161161
void reportProgress(const Solver& s, const char* msg) const;
162162
// -------------------------------------------------------------------------------------------
@@ -209,11 +209,12 @@ class ParallelHandler final : public MessageHandler {
209209
//! Removes this object from the list of post propagators of its solver and detaches the solver from ctx.
210210
void detach(SharedContext& ctx, bool fastExit);
211211

212-
bool setError(int e);
213-
[[nodiscard]] int error() const;
214-
void setWinner();
212+
[[nodiscard]] bool error() const;
215213
[[nodiscard]] bool winner() const;
216214

215+
void setError() noexcept;
216+
void setWinner() noexcept;
217+
217218
//! True if *this has an associated thread of execution, false otherwise.
218219
[[nodiscard]] bool joinable() const;
219220
void setThread(Clasp::mt::thread x);
@@ -299,7 +300,7 @@ class ParallelHandler final : public MessageHandler {
299300
ClauseDB integrated_; // integrated clauses
300301
uint32_t recEnd_; // where to put next received clause
301302
uint32_t intEnd_; // where to put next clause
302-
uint32_t error_ : 28; // error code or 0 if ok
303+
uint32_t error_ : 1; // 1 if thread terminated with error
303304
uint32_t win_ : 1; // 1 if thread was the first to terminate the search
304305
uint32_t up_ : 1; // 1 if next propagate should check for new lemmas/models
305306
uint32_t act_ : 1; // 1 if gp is active

src/parallel_solve.cpp

+34-36
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ struct ParallelSolve::SharedData {
8686
void reset(SharedContext& a_ctx) {
8787
clearQueue();
8888
syncT.reset();
89-
msg.clear();
9089
globalR.reset();
9190
discardVec(path);
9291
maxConflict = globalR.current();
@@ -99,7 +98,7 @@ struct ParallelSolve::SharedData {
9998
workReq = 0;
10099
restartReq = 0;
101100
generator = nullptr;
102-
errorCode = 0;
101+
error = nullptr;
103102
}
104103
void clearQueue() {
105104
workQ.first.clear();
@@ -207,7 +206,6 @@ struct ParallelSolve::SharedData {
207206
bool setControl(uint32_t flags) { return (control.fetch_or(flags) & flags) != flags; }
208207
bool clearControl(uint32_t flags) { return (control.fetch_and(~flags) & flags) == flags; }
209208
using GeneratorPtr = std::unique_ptr<Generator>;
210-
std::string msg; // global error message
211209
ScheduleStrategy globalR; // global restart strategy
212210
LitVec path; // initial guiding path - typically empty
213211
uint64_t maxConflict{0}; // current restart limit
@@ -228,7 +226,7 @@ struct ParallelSolve::SharedData {
228226
std::atomic<uint32_t> restartReq{0}; // == numThreads(): restart
229227
std::atomic<uint32_t> control{0}; // set of active message flags
230228
std::atomic<uint32_t> modCount{0}; // counter for synchronizing models
231-
int32_t errorCode{0}; // global error code
229+
std::exception_ptr error{nullptr}; // global exception
232230
};
233231

234232
// post message to all threads
@@ -381,7 +379,7 @@ inline void ParallelSolve::reportProgress(const Solver& s, const char* msg) cons
381379
}
382380

383381
// joins with and destroys all active threads
384-
int ParallelSolve::joinThreads() {
382+
void ParallelSolve::joinThreads() {
385383
uint32_t winner = thread_[master_id]->winner() ? master_id : UINT32_MAX;
386384
// detach master only after all client threads are done
387385
for (uint32_t i : irange(1u, shared_->nextId)) {
@@ -404,7 +402,6 @@ int ParallelSolve::joinThreads() {
404402
shared_->nextId = 1;
405403
shared_->syncT.stop();
406404
reportProgress(MessageEvent(*shared_->ctx->master(), "TERMINATE", MessageEvent::completed, shared_->syncT.total()));
407-
return not shared_->interrupt() ? thread_[master_id]->error() : shared_->errorCode;
408405
}
409406

410407
void ParallelSolve::doStart(SharedContext& ctx, LitView assume) {
@@ -434,10 +431,13 @@ void ParallelSolve::doStop() {
434431
shared_->generator->notify(SharedData::Generator::done);
435432
thread_[master_id]->join();
436433
}
437-
int err = joinThreads();
434+
joinThreads();
438435
shared_->generator = nullptr;
439436
shared_->ctx->distributor.reset(nullptr);
440-
POTASSCO_CHECK(err == 0, err, "%s", shared_->msg.c_str());
437+
if (shared_->error && (shared_->interrupt() || thread_[master_id]->error())) {
438+
std::rethrow_exception(shared_->error);
439+
}
440+
assert(not thread_[master_id]->error());
441441
}
442442

443443
void ParallelSolve::doDetach() {
@@ -486,16 +486,10 @@ void ParallelSolve::solveParallel(uint32_t id) {
486486
}
487487
}
488488
catch (const std::bad_alloc&) {
489-
exception(id, std::move(a), ENOMEM, "bad alloc");
490-
}
491-
catch (const std::logic_error& e) {
492-
exception(id, std::move(a), Potassco::to_underlying(Potassco::Errc::invalid_argument), e.what());
493-
}
494-
catch (const std::exception& e) {
495-
exception(id, std::move(a), Potassco::to_underlying(std::errc::interrupted), e.what());
489+
exception(id, std::move(a), true);
496490
}
497491
catch (...) {
498-
exception(id, std::move(a), Potassco::to_underlying(std::errc::interrupted), "unknown");
492+
exception(id, std::move(a), false);
499493
}
500494
assert(shared_->terminate() || thread_[id]->error());
501495
auto remaining = shared_->leaveAlgorithm();
@@ -513,28 +507,36 @@ void ParallelSolve::solveParallel(uint32_t id) {
513507
}
514508
}
515509

516-
void ParallelSolve::exception(uint32_t id, Path path, int e, const char* what) {
517-
try {
518-
if (not thread_[id]->setError(e) || e != ENOMEM || id == master_id) {
519-
ParallelSolve::doInterrupt();
510+
void ParallelSolve::exception(uint32_t id, Path path, bool oom) noexcept {
511+
for (thread_[id]->setError();;) {
512+
if (id == master_id || not oom) {
520513
if (shared_->errorSet.fetch_or(Potassco::nth_bit<uint64_t>(id)) == 0) {
521-
shared_->errorCode = e;
522-
shared_->msg.append(1, '[').append(std::to_string(id)).append("]: ").append(what);
514+
shared_->error = std::current_exception();
515+
}
516+
ParallelSolve::doInterrupt();
517+
break;
518+
}
519+
try {
520+
if (path.owner() && shared_->allowSplit()) {
521+
shared_->pushWork(std::move(path));
523522
}
523+
break;
524524
}
525-
else if (path.owner() && shared_->allowSplit()) {
526-
shared_->pushWork(std::move(path));
525+
catch (...) { // we failed to push back the path and therefore can't continue
526+
oom = false;
527+
path = {};
527528
}
528-
reportProgress(thread_[id]->solver(),
529-
e == ENOMEM ? "Thread failed with out of memory" : "Thread failed with error");
530529
}
531-
catch (...) {
532-
ParallelSolve::doInterrupt();
530+
try {
531+
reportProgress(thread_[id]->solver(), oom ? "Thread failed with out of memory" : "Thread failed with error");
532+
}
533+
catch (...) { // NOLINT(bugprone-empty-catch)
534+
// ignore exception from progress
533535
}
534536
}
535537

536538
// forced termination from outside
537-
bool ParallelSolve::doInterrupt() {
539+
bool ParallelSolve::doInterrupt() noexcept {
538540
// do not notify blocked threads to avoid possible
539541
// deadlock in semaphore!
540542
shared_->postMessage(SharedData::msg_interrupt, false);
@@ -863,14 +865,10 @@ void ParallelHandler::setThread(Clasp::mt::thread x) {
863865
assert(not joinable() && x.joinable());
864866
thread_ = std::move(x);
865867
}
866-
bool ParallelHandler::setError(int code) {
867-
error_ = static_cast<uint32_t>(code);
868-
return thread_.joinable() && not winner();
869-
}
870-
871-
void ParallelHandler::setWinner() { win_ = 1; }
868+
void ParallelHandler::setError() noexcept { error_ = 1; }
869+
void ParallelHandler::setWinner() noexcept { win_ = 1; }
872870
bool ParallelHandler::winner() const { return win_ != 0; }
873-
int ParallelHandler::error() const { return static_cast<int>(error_); }
871+
bool ParallelHandler::error() const { return error_ != 0; }
874872
bool ParallelHandler::joinable() const { return thread_.joinable(); }
875873

876874
void ParallelHandler::clearDB(Solver* s) {

tests/facade_test.cpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -1413,18 +1413,19 @@ TEST_CASE("Facade mt", "[facade][mt]") {
14131413
EventVar* ev;
14141414
};
14151415
libclasp.ctx.master()->addPost(new Blocker(ev));
1416+
struct ModelError : std::exception {};
14161417
struct Handler : EventHandler {
14171418
bool onModel(const Solver& s, const Model&) override {
14181419
if (&s != s.sharedContext()->master()) {
14191420
ev->fire();
1420-
throw std::runtime_error("Model from thread");
1421+
throw ModelError{};
14211422
}
14221423
return false;
14231424
}
14241425
EventVar* ev = nullptr;
14251426
} h;
14261427
h.ev = &ev;
1427-
REQUIRE_THROWS_AS(libclasp.solve(SolveMode::def, LitVec(), &h), std::runtime_error);
1428+
REQUIRE_THROWS_AS(libclasp.solve(SolveMode::def, LitVec(), &h), ModelError);
14281429
}
14291430
SECTION("throw on propagate") {
14301431
struct Blocker : PostPropagator {
@@ -1442,9 +1443,7 @@ TEST_CASE("Facade mt", "[facade][mt]") {
14421443
if (et == alloc) {
14431444
throw std::bad_alloc();
14441445
}
1445-
else {
1446-
throw std::logic_error("Something happened");
1447-
}
1446+
throw std::domain_error("Something happened");
14481447
}
14491448
return true;
14501449
}
@@ -1459,7 +1458,7 @@ TEST_CASE("Facade mt", "[facade][mt]") {
14591458
}
14601459
SECTION("logicFailStop") {
14611460
libclasp.ctx.solver(1)->addPost(new Blocker(ev, Blocker::logic));
1462-
REQUIRE_THROWS_AS(libclasp.solve(), std::logic_error);
1461+
REQUIRE_THROWS_AS(libclasp.solve(), std::domain_error);
14631462
}
14641463
}
14651464
}

0 commit comments

Comments
 (0)