From 2d45954bbb26471eaf48c72d9a9f0a2dcd8536cc Mon Sep 17 00:00:00 2001 From: Giovanni Bussi Date: Thu, 16 May 2024 09:22:17 +0200 Subject: [PATCH 1/3] Fixed leaks in Subprocess Two leaks in Subprocess destructor: - one of the communication pipes was left open by mistake - the killed process was not waited for These could create problems for too many Subprocesses created (in a sequence), for instance by running multiple plumed objects which all uses the python selector, or similarly. --- src/tools/Subprocess.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/tools/Subprocess.cpp b/src/tools/Subprocess.cpp index 29c8312a20..5019d97fee 100644 --- a/src/tools/Subprocess.cpp +++ b/src/tools/Subprocess.cpp @@ -25,6 +25,7 @@ #ifdef __PLUMED_HAS_SUBPROCESS #include #include +#include #endif namespace PLMD { @@ -59,7 +60,11 @@ class SubprocessPid { } ~SubprocessPid() { // this is apparently working also with MPI on Travis. - if(pid!=0 && pid!=-1) kill(pid,SIGINT); + if(pid!=0 && pid!=-1) { + int status; + kill(pid,SIGINT); + waitpid(pid, &status, 0); // Wait for the child process to terminate + } } #endif }; @@ -113,11 +118,15 @@ Subprocess::Subprocess(const std::string & cmd) { Subprocess::~Subprocess() { #ifdef __PLUMED_HAS_SUBPROCESS -// fpc should be closed to terminate the child executable +// close files: fclose(fppc); + fclose(fpcp); +// fclose also closes the underlying descriptors, +// so this is not needed: close(fpc); -// fcp should not be closed because it could make the child executable fail -/// TODO: check if this is necessary and make this class exception safe! + close(fcp); +// after closing the communication, the subprocess is killed +// in pid's destructor #endif } From d48b97801d70184970c680db3197f3ad358526e3 Mon Sep 17 00:00:00 2001 From: Giovanni Bussi Date: Thu, 16 May 2024 09:23:53 +0200 Subject: [PATCH 2/3] Regtest --- regtest/basic/rt-make-subprocess/Makefile | 1 + regtest/basic/rt-make-subprocess/config | 2 ++ regtest/basic/rt-make-subprocess/main.cpp | 31 +++++++++++++++++++ .../should_be_empty.reference | 0 4 files changed, 34 insertions(+) create mode 100644 regtest/basic/rt-make-subprocess/Makefile create mode 100644 regtest/basic/rt-make-subprocess/config create mode 100644 regtest/basic/rt-make-subprocess/main.cpp create mode 100644 regtest/basic/rt-make-subprocess/should_be_empty.reference diff --git a/regtest/basic/rt-make-subprocess/Makefile b/regtest/basic/rt-make-subprocess/Makefile new file mode 100644 index 0000000000..3703b27cea --- /dev/null +++ b/regtest/basic/rt-make-subprocess/Makefile @@ -0,0 +1 @@ +include ../../scripts/test.make diff --git a/regtest/basic/rt-make-subprocess/config b/regtest/basic/rt-make-subprocess/config new file mode 100644 index 0000000000..095cfd3fdf --- /dev/null +++ b/regtest/basic/rt-make-subprocess/config @@ -0,0 +1,2 @@ +type=make +plumed_needs=subprocess diff --git a/regtest/basic/rt-make-subprocess/main.cpp b/regtest/basic/rt-make-subprocess/main.cpp new file mode 100644 index 0000000000..aa496a6b4c --- /dev/null +++ b/regtest/basic/rt-make-subprocess/main.cpp @@ -0,0 +1,31 @@ +#include "plumed/tools/Subprocess.h" +#include +#include + +using namespace PLMD; + +int main(){ + // if processes are not properly terminated and cleaned + // only a finite number of them will be allowed + + std::ofstream ofs("should_be_empty"); + + // the number of iteration required to trigger the bugs fixed in + // 2d45954bbb26471eaf48c72d9a9f0a2dcd8536cc + // are, on MacOS: + // ~ 250 for the missing close call + // ~ 2500 for the missing waitpid call + + // notice that we are not spawning simultaneous processes. + // they are called in a sequential way and killed (by destructor) + for(unsigned i=0;i<10000;i++) { + try { + Subprocess sp("yes"); + } catch(...) { + ofs<<"failed after "< Date: Thu, 16 May 2024 09:24:00 +0200 Subject: [PATCH 3/3] changelog --- CHANGES/v2.8.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES/v2.8.md b/CHANGES/v2.8.md index 89f72024cf..373904356c 100644 --- a/CHANGES/v2.8.md +++ b/CHANGES/v2.8.md @@ -113,4 +113,5 @@ Changes from version 2.7 which are relevant for users: - Small fix in Python, where we do not assume anymore that strings are null terminated. - Environment variables such as `PLUMED_INCLUDEDIR` and similar are sanitized before they are used for running commands. - Fixed a bug leading to a crash when using python selectors (e.g., `@mda:` or `@mdt:`) and multiple MPI processes per replica. +- Fixed leaks in Subprocess, potentially givin problems with a large number (>250) of sequentially created plumed objects all of them using `@mda:` or `@mdt:` selectors.