From 003521f17a1e440b4e227f5280fa6cf12db1827a Mon Sep 17 00:00:00 2001 From: Yuri Goldfeld Date: Mon, 18 Dec 2023 21:49:02 -0800 Subject: [PATCH 1/2] TSAN caught subtle error when exiting transport_test (exercise mode); worker loop should be stopped before things that tasks posted-onto it touch get destroyed in dtor. So doing that now. Assuming I got that right transport_test will finish fine with TSAN, in exercise mode, at least in heap sub-mode (SHM-classic and SHM-jemalloc sub-modes TBD). Also updating submodules (comment/spacing only changes). --- ipc_core | 2 +- ipc_session | 2 +- test/suite/transport_test/ex_cli.hpp | 8 ++++++++ test/suite/transport_test/ex_guy.cpp | 5 +++++ test/suite/transport_test/ex_guy.hpp | 1 + test/suite/transport_test/ex_srv.hpp | 13 +++++++++++++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/ipc_core b/ipc_core index 21c24880e..f278e699e 160000 --- a/ipc_core +++ b/ipc_core @@ -1 +1 @@ -Subproject commit 21c24880e3e4692355d30a53799fd493062fb256 +Subproject commit f278e699e8188251a1ec187e3285c2c9cbd4073b diff --git a/ipc_session b/ipc_session index 816e21721..4c5d8cc10 160000 --- a/ipc_session +++ b/ipc_session @@ -1 +1 @@ -Subproject commit 816e217213c38e5c9f75e0632754852413f3cc0e +Subproject commit 4c5d8cc10c205ebe1bfa4a41c32c4db174578ba9 diff --git a/test/suite/transport_test/ex_cli.hpp b/test/suite/transport_test/ex_cli.hpp index aac514bad..208327278 100644 --- a/test/suite/transport_test/ex_cli.hpp +++ b/test/suite/transport_test/ex_cli.hpp @@ -32,6 +32,7 @@ class Ex_cli : public Ex_guy using Session = Session_t; Ex_cli(flow::log::Logger* logger_ptr, flow::log::Logger* ipc_logger_ptr); + ~Ex_cli(); bool run(); @@ -151,6 +152,13 @@ CLASS::Ex_cli(flow::log::Logger* logger_ptr, flow::log::Logger* ipc_logger_ptr) // OK. } +TEMPLATE +CLASS::~Ex_cli() +{ + // See Ex_srv dtor. Same reasoning here. + Base::stop_worker(); +} + TEMPLATE bool CLASS::run() { diff --git a/test/suite/transport_test/ex_guy.cpp b/test/suite/transport_test/ex_guy.cpp index 7036d7474..53caf381a 100644 --- a/test/suite/transport_test/ex_guy.cpp +++ b/test/suite/transport_test/ex_guy.cpp @@ -72,6 +72,11 @@ void Ex_guy::done_and_done(bool ok) m_exit_promise.set_value(ok); } +void Ex_guy::stop_worker() +{ + m_loop.stop(); +} + flow::util::Task_engine* Ex_guy::task_engine() { return m_loop.task_engine().get(); diff --git a/test/suite/transport_test/ex_guy.hpp b/test/suite/transport_test/ex_guy.hpp index 8ad0c01a3..f885c1d4b 100644 --- a/test/suite/transport_test/ex_guy.hpp +++ b/test/suite/transport_test/ex_guy.hpp @@ -67,6 +67,7 @@ class Ex_guy : public flow::log::Log_context static Sptr make_sptr(Args&&... args); void done_and_done(bool ok); + void stop_worker(); template bool run(bool srv_else_cli, Task&& body); diff --git a/test/suite/transport_test/ex_srv.hpp b/test/suite/transport_test/ex_srv.hpp index d64f39a9d..bfd285ddd 100644 --- a/test/suite/transport_test/ex_srv.hpp +++ b/test/suite/transport_test/ex_srv.hpp @@ -36,6 +36,7 @@ class Ex_srv : public Ex_guy using Server = Server_t; Ex_srv(flow::log::Logger* logger_ptr, flow::log::Logger* ipc_logger_ptr); + ~Ex_srv(); bool run(); @@ -171,6 +172,18 @@ CLASS::Ex_srv(flow::log::Logger* logger_ptr, flow::log::Logger* ipc_logger_ptr) // Yeah. } +TEMPLATE +CLASS::~Ex_srv() +{ + /* A bit subtle but: Our base Ex_cli has the Single_thread_task_loop (thread W), where things are post()ed + * (directly or via async_wait() at least) to execute; those things will touch parts of *this. *this is + * about to be destroyed; so there is a race wherein last-second stuff can try to touch parts that are + * being destroyed at the same time (data race, caught by TSAN, yay!), or even after they are destroyed + * (use-after-free, which ASAN would've caught). At any rate this will synchronously stop thread W before + * such things can happen. */ + Base::stop_worker(); +} + TEMPLATE bool CLASS::run() { From 474dfd24a9a2572b2f4d39c1ca4746221c6e99d3 Mon Sep 17 00:00:00 2001 From: Yuri Goldfeld Date: Mon, 18 Dec 2023 22:06:57 -0800 Subject: [PATCH 2/2] transport_test is now TSAN-ready (with ~no suppressions) in exercise mode, heap sub-mode (TODO: working on the remaining sub-modes -- SHM-* -- presently). Incidentally I do not feel we need the skip-transport-test switches anymore. --- .github/workflows/main.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d4cf1b94f..e619dcb82 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -231,7 +231,6 @@ jobs: Shm_pool_collection_test.Interface:\ Shm_pool_collection_test.Multiprocess:\ Shm_session_data_test.Multithread_object_database - skip-transport-tests-ex: true # TODO: Explain why briefly (even though there's prob a ticket). sanitizer-name: tsan no-lto: false # See the other *SAN; but for now I heed jkontrik's words to keep consistent with non-*SAN. - id: relwithdebinfo-msan @@ -679,7 +678,7 @@ jobs: # This follows the instructions in bin/transport_test/README.txt. - name: Prepare run script for [transport_test - Scripted mode] variations below if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-sc) + !cancelled() run: | cat <<'EOF' > ${{ env.install-dir }}/bin/run_transport_test_sc.sh echo "Log level: [$1]." @@ -708,7 +707,7 @@ jobs: - name: Run integration test [transport_test - Scripted mode] id: transport_test_scripted if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-sc) + !cancelled() run: | /usr/bin/bash -e \ ${{ env.install-dir }}/bin/run_transport_test_sc.sh \ @@ -729,7 +728,7 @@ jobs: - name: Prepare IPC-session safety-friendly run-time environment for [transport_test - Exercise mode] if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-ex) + !cancelled() run: | mkdir -p ~/bin/ex_srv_run ~/bin/ex_cli_run mkdir -p /tmp/var/run @@ -739,7 +738,7 @@ jobs: - name: Prepare run script for [transport_test - Exercise mode] variations below if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-ex) + !cancelled() run: | cat <<'EOF' > ${{ env.install-dir }}/bin/run_transport_test_ex.sh # Script created by pipeline during job. @@ -774,7 +773,7 @@ jobs: - name: Run integration test [transport_test - Exercise mode - Heap sub-mode] id: transport_test_ex_heap if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-ex) + !cancelled() run: | /usr/bin/bash -e \ ${{ env.install-dir }}/bin/run_transport_test_ex.sh \ @@ -790,8 +789,8 @@ jobs: - name: Run integration test [transport_test - Exercise mode - SHM-classic sub-mode] id: transport_test_ex_shm_c - if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-ex) + if: | # TODO: Skipping TSAN mode for this; currently unstable; working on it (ticket). + (!cancelled()) && (matrix.build-test-cfg.sanitizer-name != 'tsan') run: | /usr/bin/bash -e \ ${{ env.install-dir }}/bin/run_transport_test_ex.sh \ @@ -807,8 +806,8 @@ jobs: - name: Run integration test [transport_test - Exercise mode - SHM-jemalloc sub-mode] id: transport_test_ex_shm_j - if: | - (!cancelled()) && (!matrix.build-test-cfg.skip-transport-tests-ex) + if: | # TODO: Skipping TSAN mode for this; currently unstable; working on it (ticket). + (!cancelled()) && (matrix.build-test-cfg.sanitizer-name != 'tsan') run: | /usr/bin/bash -e \ ${{ env.install-dir }}/bin/run_transport_test_ex.sh \