diff --git a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh index 41eebb73..fcb28bb4 100644 --- a/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh +++ b/src/Integrator/BDHI/DoublyPeriodic/DPStokesSlab.cuh @@ -18,6 +18,7 @@ #include #include "misc/LanczosAlgorithm.cuh" #include"StokesSlab/spreadInterp.cuh" +#include namespace uammd{ namespace DPStokesSlab_ns{ @@ -140,21 +141,23 @@ namespace uammd{ }; - struct SaruTransform{ + struct SaruTransform{ uint s1, s2; - SaruTransform(uint s1, uint s2):s1(s1), s2(s2){} + real std; + SaruTransform(uint s1, uint s2, real std = 1.0): + s1(s1), s2(s2), std(std){} __device__ real3 operator()(uint id){ Saru rng(s1, s2, id); - return make_real3(rng.gf(0.0f,1.0f), rng.gf(0.0f,1.0f).x); + return make_real3(rng.gf(0.0f,std), rng.gf(0.0f,std).x); } }; - auto fillRandomVectorReal3(int numberParticles, uint s1, uint s2){ + auto fillRandomVectorReal3(int numberParticles, uint s1, uint s2, real std = 1.0){ cached_vector noise(numberParticles); auto cit = thrust::make_counting_iterator(0); - auto tr = thrust::make_transform_iterator(cit, SaruTransform(s1, s2)); + auto tr = thrust::make_transform_iterator(cit, SaruTransform(s1, s2, std)); thrust::copy(thrust::cuda::par, tr, tr + numberParticles, noise.begin()); @@ -175,32 +178,28 @@ namespace uammd{ struct BDIntegrate{ real4* pos; real3* mf; - real3* sqrtmdwn; - real3* sqrtmdwnm1; - real3* mpw; - real3* mmw; - real rfdPrefactor; + real3* noise; + real3* noisePrev; + real3* rfd; real dt; - real noisePrefactor; BDIntegrate(real4* pos, real3* mf, - real3* sqrtmdwn, real3* sqrtmdwnm1, - real3* mpw, real3* mmw, real deltaRFD, + real3* noise, real3* noisePrev, + real3* rfd, real dt, real temperature): - pos(pos), mf(mf), sqrtmdwn(sqrtmdwn),sqrtmdwnm1(sqrtmdwnm1), - mpw(mpw), mmw(mmw), dt(dt){ - this->noisePrefactor = sqrt(2*temperature*dt); - this->rfdPrefactor = dt*temperature/deltaRFD; + pos(pos), mf(mf), noise(noise),noisePrev(noisePrev), + rfd(rfd), dt(dt){ } __device__ void operator()(int id){ - real3 fluct; - if(sqrtmdwnm1) - fluct = noisePrefactor*real(0.5)*(sqrtmdwn[id] + sqrtmdwnm1[id]); - else - fluct = noisePrefactor*sqrtmdwn[id]; - real3 displacement = mf[id]*dt + - fluct - +rfdPrefactor * (mpw[id] - mmw[id]); + real3 displacement = mf[id]*dt; + if(noise){ + real3 fluct; + if(noisePrev) + fluct = dt*real(0.5)*(noise[id] + noisePrev[id]); + else + fluct = dt*noise[id]; + displacement += fluct + rfd[id]; + } pos[id] += make_real4(displacement); } }; @@ -209,7 +208,7 @@ namespace uammd{ class DPStokesIntegrator: public Integrator{ int steps = 0; - uint seed; + uint seed, seedRFD; std::shared_ptr dpstokes; std::shared_ptr lanczos; thrust::device_vector previousNoise; @@ -228,9 +227,68 @@ namespace uammd{ System::log("[DPStokes] dt %g", par.dt); System::log("[DPStokes] temperature: %g", par.temperature); this->seed = pd->getSystem()->rng().next32(); + this->seedRFD = pd->getSystem()->rng().next32(); this->deltaRFD = 1e-6*par.hydrodynamicRadius; } + //Returns the thermal drift term: temperature*dt*(\partial_q \cdot M) + auto computeThermalDrift(){ + auto pos = pd->getPos(access::gpu, access::read); + const int numberParticles = pd->getNumParticles(); + if(par.temperature){ + auto noise2 = detail::fillRandomVectorReal3(numberParticles, seedRFD, steps); + auto cit = thrust::make_counting_iterator(0); + auto posp = thrust::make_transform_iterator(cit, + detail::SumPosAndNoise(pos.raw(), + noise2.data().get(), + deltaRFD*0.5)); + auto mpw = dpstokes->Mdot(posp, noise2.data().get(), numberParticles, 0); + auto posm = thrust::make_transform_iterator(cit, + detail::SumPosAndNoise(pos.raw(), + noise2.data().get(), + -deltaRFD*0.5)); + auto mmw = dpstokes->Mdot(posm, noise2.data().get(), numberParticles, 0); + using namespace thrust::placeholders; + thrust::transform(mpw.begin(), mpw.end(), + mmw.begin(), + mpw.begin(), + make_real3(par.dt*par.temperature/deltaRFD)*(_1 - _2)); + return mpw; + } + else{ + return cached_vector(); + } + } + + //Returns sqrt(2*M*temperature/dt)dW + auto computeFluctuations(){ + auto pos = pd->getPos(access::gpu, access::read); + const int numberParticles = pd->getNumParticles(); + cached_vector bdw(numberParticles); + thrust::fill(bdw.begin(), bdw.end(), real3()); + if(par.temperature){ + detail::LanczosAdaptor dot(dpstokes, pos.raw(), numberParticles); + auto noise = detail::fillRandomVectorReal3(numberParticles, seed, steps, + sqrt(2*par.temperature/par.dt)); + lanczos->run(dot, + (real*)bdw.data().get(), (real*)noise.data().get(), + par.tolerance, 3*numberParticles); + } + return bdw; + } + + //Returns the product of the forces and the mobility matrix, M F + auto computeDeterministicDisplacements(){ + auto pos = pd->getPos(access::gpu, access::read); + auto force = pd->getForce(access::gpu, access::read); + if(pd->isTorqueAllocated()){ + System::log("[DPStokes] Torques are not yet implemented"); + throw std::runtime_error("Operation not implemented"); + } + const int numberParticles = pd->getNumParticles(); + return dpstokes->Mdot(pos.raw(), force.raw(), numberParticles, 0); + } + void forwardTime(){ System::log("[DPStokes] Running step %d", steps); if(steps==0) setUpInteractors(); @@ -239,46 +297,34 @@ namespace uammd{ i->updateSimulationTime(par.dt*steps); i->sum({.force=true}); } - auto pos = pd->getPos(access::gpu, access::readwrite); - auto force = pd->getForce(access::gpu, access::read); - auto torque = pd->getTorqueIfAllocated(access::gpu, access::read); const int numberParticles = pd->getNumParticles(); - auto mf = dpstokes->Mdot(pos.raw(), force.raw(), numberParticles, 0); - if(torque.raw()){ - System::log("[DPStokes] Torques are not yet implemented"); - throw std::runtime_error("Operation not implemented"); + auto mf = computeDeterministicDisplacements(); + if(par.temperature){ + auto bdw = computeFluctuations(); + if(par.useLeimkuhler and previousNoise.size() != numberParticles){ + previousNoise.resize(numberParticles); + thrust::copy(bdw.begin(), bdw.end(), previousNoise.begin()); + } + auto rfd = computeThermalDrift(); + real3* d_prevNoise = par.useLeimkuhler?previousNoise.data().get():nullptr; + auto pos = pd->getPos(access::gpu, access::readwrite); + detail::BDIntegrate bd(pos.raw(), mf.data().get(), + bdw.data().get(), d_prevNoise, + rfd.data().get(), + par.dt, par.temperature); + auto cit = thrust::make_counting_iterator(0); + thrust::for_each_n(thrust::cuda::par, cit, numberParticles, bd); + if(par.useLeimkuhler) + thrust::copy(bdw.begin(), bdw.end(), previousNoise.begin()); } - cached_vector bdw(numberParticles); - thrust::fill(bdw.begin(), bdw.end(), real3()); - detail::LanczosAdaptor dot(dpstokes, pos.raw(), numberParticles); - auto noise = detail::fillRandomVectorReal3(numberParticles, seed, 2*steps); - lanczos->run(dot, - (real*)bdw.data().get(), (real*)noise.data().get(), - par.tolerance, 3*numberParticles); - if(par.useLeimkuhler and previousNoise.size() != numberParticles){ - previousNoise.resize(numberParticles); - thrust::copy(bdw.begin(), bdw.end(), previousNoise.begin()); + else{ + auto pos = pd->getPos(access::gpu, access::readwrite); + detail::BDIntegrate bd(pos.raw(), mf.data().get(), + nullptr, nullptr, nullptr, + par.dt, 0); + auto cit = thrust::make_counting_iterator(0); + thrust::for_each_n(thrust::cuda::par, cit, numberParticles, bd); } - auto noise2 = detail::fillRandomVectorReal3(numberParticles, seed, 2*steps+1); - auto cit = thrust::make_counting_iterator(0); - auto posp = thrust::make_transform_iterator(cit, - detail::SumPosAndNoise(pos.raw(), - noise2.data().get(), - deltaRFD*0.5)); - auto mpw = dpstokes->Mdot(posp, noise2.data().get(), numberParticles, 0); - auto posm = thrust::make_transform_iterator(cit, - detail::SumPosAndNoise(pos.raw(), - noise2.data().get(), - -deltaRFD*0.5)); - auto mmw = dpstokes->Mdot(posm, noise2.data().get(), numberParticles, 0); - real3* d_prevNoise = par.useLeimkuhler?previousNoise.data().get():nullptr; - detail::BDIntegrate bd(pos.raw(), mf.data().get(), - bdw.data().get(), d_prevNoise, - mpw.data().get(), mmw.data().get(), - deltaRFD, par.dt, par.temperature); - thrust::for_each_n(thrust::cuda::par, cit, numberParticles, bd); - if(par.useLeimkuhler) - thrust::copy(bdw.begin(), bdw.end(), previousNoise.begin()); steps++; } diff --git a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/utils.cuh b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/utils.cuh index 0204284b..259df793 100644 --- a/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/utils.cuh +++ b/src/Integrator/BDHI/DoublyPeriodic/StokesSlab/utils.cuh @@ -202,16 +202,16 @@ namespace uammd{ m_z.resize(newSize); } - void fillWithZero() const{ + void fillWithZero(){ thrust::fill(m_x.begin(), m_x.end(), T()); thrust::fill(m_y.begin(), m_y.end(), T()); thrust::fill(m_z.begin(), m_z.end(), T()); } using Iterator = T*; - Iterator x()const{return thrust::raw_pointer_cast(m_x.data());} - Iterator y()const{return thrust::raw_pointer_cast(m_y.data());} - Iterator z()const{return thrust::raw_pointer_cast(m_z.data());} + Iterator x()const{return (Iterator)m_x.data().get();} + Iterator y()const{return (Iterator)m_y.data().get();} + Iterator z()const{return (Iterator)m_z.data().get();} auto xyz() const{ auto zip = thrust::make_zip_iterator(thrust::make_tuple(x(), y(), z())); @@ -292,7 +292,7 @@ namespace uammd{ FluidPointers(){} template FluidPointers(const Container &pressure, const DataXYZ &vel): - pressure(thrust::raw_pointer_cast(pressure.data())), + pressure((T*)(pressure.data().get())), velocityX(vel.x()), velocityY(vel.y()), velocityZ(vel.z()){} T* pressure; typename DataXYZ::Iterator velocityX, velocityY, velocityZ; diff --git a/src/Integrator/BDHI/FCM/FCM_impl.cuh b/src/Integrator/BDHI/FCM/FCM_impl.cuh index 693a3c2a..781720d2 100644 --- a/src/Integrator/BDHI/FCM/FCM_impl.cuh +++ b/src/Integrator/BDHI/FCM/FCM_impl.cuh @@ -81,10 +81,6 @@ namespace uammd{ System::log("FCM_impl requires instances of the spreading kernels"); throw std::runtime_error("Invalid arguments"); } - if(par.cells.x <= 0){ - System::log("FCM_impl requires valid grid dimensions"); - throw std::runtime_error("Invalid arguments"); - } printMessages(par); initCuFFT(); CudaSafeCall(cudaDeviceSynchronize()); @@ -149,8 +145,10 @@ namespace uammd{ auto rh = this->getHydrodynamicRadius(); auto M0 = this->getSelfMobility(); System::log("[BDHI::FCM] Using kernel: %s", type_name().c_str()); - System::log("[BDHI::FCM] Closest possible hydrodynamic radius: %g", rh); - System::log("[BDHI::FCM] Self mobility: %g", (double)M0); + if(rh>0){ + System::log("[BDHI::FCM] Hydrodynamic radius: %g", rh); + System::log("[BDHI::FCM] Self mobility: %g", (double)M0); + } if(box.boxSize.x != box.boxSize.y || box.boxSize.y != box.boxSize.z || box.boxSize.x != box.boxSize.z){ System::log("[BDHI::FCM] Self mobility will be different for non cubic boxes!"); } diff --git a/test/BDHI/DPStokes/CMakeLists.txt b/test/BDHI/DPStokes/CMakeLists.txt index bd71b318..47911238 100644 --- a/test/BDHI/DPStokes/CMakeLists.txt +++ b/test/BDHI/DPStokes/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) set(CUDA_ARCHITECTURES OFF) -set(UAMMD_INCLUDE ../../../../uammd/src ../../../../uammd/src/third_party) +set(UAMMD_INCLUDE ../../../src ../../../src/third_party) include(FetchContent) diff --git a/test/BDHI/FCM/CMakeLists.txt b/test/BDHI/FCM/CMakeLists.txt new file mode 100644 index 00000000..0310083d --- /dev/null +++ b/test/BDHI/FCM/CMakeLists.txt @@ -0,0 +1,64 @@ +cmake_minimum_required(VERSION 3.14) +project(fcm_test) +enable_language(CUDA) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") +set(CMAKE_BUILD_TYPE Release) +#set( CMAKE_VERBOSE_MAKEFILE on ) +#add_compile_definitions(PUBLIC MAXLOGLEVEL=15) +#GoogleTest requires at least C++14 +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) +set(CUDA_ARCHITECTURES OFF) + +set(UAMMD_INCLUDE ../../../src ../../../src/third_party) + + +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) +enable_testing() + + + +#set(BLA_VENDOR Intel10_64lp) +find_package(BLAS) +if(BLAS_FOUND) + message("mkl environment detected") + add_compile_definitions(PUBLIC USE_MKL) +else() + unset(BLA_VENDOR) + find_package(BLAS REQUIRED) + find_package(LAPACK REQUIRED) + find_package(LAPACKE REQUIRED) +endif() + +add_compile_definitions(PUBLIC DOUBLE_PRECISION) +include_directories(${UAMMD_INCLUDE}) +link_libraries(${LAPACK_LIBRARIES} ${LAPACKE_LIBRARIES} ${BLAS_LIBRARIES} ${CUDA_LIBRARY} cufft cublas cusolver) + +add_executable( + fcm_test + fcm_test.cu +) + +target_link_libraries( + fcm_test + GTest::gtest_main + GTest::gmock_main +) + +include(GoogleTest) +gtest_discover_tests(fcm_test) + +IF (CMAKE_BUILD_TYPE MATCHES "Debug") + set(CMAKE_CUDA_FLAGS "-g -G -lineinfo -src-in-ptx") + set(CMAKE_CXX_FLAGS "-g -Wall -Wextra -pedantic") +ENDIF() diff --git a/test/BDHI/FCM/fcm_test.cu b/test/BDHI/FCM/fcm_test.cu new file mode 100644 index 00000000..08438b0f --- /dev/null +++ b/test/BDHI/FCM/fcm_test.cu @@ -0,0 +1,132 @@ +/*Raul P. Pelaez 2022. Tests for the FCM algorithm and integrator + + */ +#include +#include +#include "gmock/gmock.h" +#include "Integrator/BDHI/BDHI_FCM.cuh" +#include "utils/container.h" +#include +#include +#include +#include + +using namespace uammd; + +using BDHI::FCM_impl; +using Kernel = BDHI::FCM_ns::Kernels::Gaussian; +using KernelTorque = BDHI::FCM_ns::Kernels::GaussianTorque; +//Different kernels can achieve different maximum accuracies. For +//instance, the Gaussian kernel can safely achieve 8 digits of +//accuracy in the self mobility, but the Peskin 3pt kernel will only +//give about 3. Place the maximum expected accuracy in this variable +//if you want to check a new kernel. +constexpr real expectedAccuracy = 1e-8; +//Although the Gaussian kernel should be able to achieve even more +//accuracy, beyond 8 digits the approximate solution for the periodic +//correction of the self mobility I am using starts to fail. + +template using cached_vector = uninitialized_cached_vector; + +auto initializeKernel(real tolerance, real3 L, int3 n){ + real h = std::min({L.x/n.x, L.y/n.y, L.z/n.z}); + auto kernel = std::make_shared(h, tolerance); + return kernel; +} + +auto initializeKernelTorque(real hydrodynamicRadius, real tolerance, real3 L, int3 n){ + real a = hydrodynamicRadius; + real width = a/(pow(6*sqrt(M_PI), 1/3.)); + real h = std::min({L.x/n.x, L.y/n.y, L.z/n.z}); + auto kernelTorque = std::make_shared(width, h, tolerance); + return kernelTorque; +} + +TEST(FCM_impl, CanBeCreated){ + using FCM = FCM_impl; + FCM::Parameters par; + real hydrodynamicRadius = 1; + real3 L = {128, 128, 128}; + par.temperature = 1; + par.viscosity = 1; + par.tolerance = 1e-3; + par.dt = 1; + par.box = Box(L); + par.cells = {128,128,128};//createGrid(L, hydrodynamicRadius, par. tolerance); + par.kernel = initializeKernel(par.tolerance, L, par.cells); + par.kernelTorque = initializeKernelTorque(hydrodynamicRadius, par.tolerance, L, par.cells); + auto fcm = std::make_shared(par); +} + +real selfMobility(real hydrodynamicRadius, real viscosity, real L){ + //O(a^8) accuracy. See Hashimoto 1959. + //With a Gaussian this expression has a minimum deviation from measuraments of 7e-7*rh at L=64*rh. + //The translational invariance of the hydrodynamic radius however decreases arbitrarily with the tolerance. + //Seems that this deviation decreases with L, so probably is due to the correction below missing something. + long double rh = hydrodynamicRadius; + long double a = rh/L; + long double a2= a*a; long double a3 = a2*a; + long double c = 2.83729747948061947666591710460773907l; + long double b = 0.19457l; + long double a6pref = 16.0l*M_PIl*M_PIl/45.0l + 630.0L*b*b; + return 1.0l/(6.0l*M_PIl*viscosity*rh)*(1.0l-c*a+(4.0l/3.0l)*M_PIl*a3-a6pref*a3*a3); +} + +//Check that the self mobility stays below tolerance at a series of random points inside the domain +// in every direction when pulling a particle +TEST(FCM_impl, SelfMobilityIsCorrectUpToTolerance){ + using FCM = FCM_impl; + FCM::Parameters par; + real hydrodynamicRadius = 1.012312; + par.viscosity = 1.12321; + par.tolerance = expectedAccuracy; + par.dt = 1; + real h = Kernel::adviseGridSize(hydrodynamicRadius, par.tolerance); + //Ensure the box size is a multiple of the grid size + //A box approximately 128*hydrodynamicRadius in size + real3 L = make_real3(96*h*ceil(hydrodynamicRadius/h)); + par.cells = make_int3(L/h); + par.box = Box(L); + //par.cells = createGrid(L, hydrodynamicRadius, par. tolerance); + par.kernel = initializeKernel(par.tolerance, L, par.cells); + par.kernelTorque = initializeKernelTorque(hydrodynamicRadius, par.tolerance, L, par.cells); + par.hydrodynamicRadius = hydrodynamicRadius; + auto fcm = std::make_shared(par); + int numberParticles = 1; + cached_vector pos(numberParticles); + real temperature = 0; + real prefactor = 0; + real4* torque = nullptr; + auto force = pos; + real3 m0; + int ntest = 20; + Saru rng(1234); + for(int j = 0; jcomputeHydrodynamicDisplacements(pos.data().get(), + force.data().get(), + torque, + numberParticles, temperature, prefactor, 0); + real3 dx = disp.first[0]; + ASSERT_THAT(dx.x, ::testing::DoubleNear(m0.x, par.tolerance)); + ASSERT_THAT(dx.y, ::testing::DoubleNear(m0.y, par.tolerance)); + ASSERT_THAT(dx.z, ::testing::DoubleNear(m0.z, par.tolerance)); + } + } +} diff --git a/test/misc/chebyshev/CMakeLists.txt b/test/misc/chebyshev/CMakeLists.txt index 18c85249..db458943 100644 --- a/test/misc/chebyshev/CMakeLists.txt +++ b/test/misc/chebyshev/CMakeLists.txt @@ -11,7 +11,7 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) set(CUDA_ARCHITECTURES OFF) -set(UAMMD_INCLUDE ../../../../uammd/src ../../../../uammd/src/third_party) +set(UAMMD_INCLUDE ../../../src ../../../src/third_party) include(FetchContent) FetchContent_Declare( diff --git a/test/misc/lanczos/CMakeLists.txt b/test/misc/lanczos/CMakeLists.txt index a50984ed..0663a620 100644 --- a/test/misc/lanczos/CMakeLists.txt +++ b/test/misc/lanczos/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) set(CUDA_ARCHITECTURES OFF) -set(UAMMD_INCLUDE ../../../../uammd/src ../../../../uammd/src/third_party) +set(UAMMD_INCLUDE ../../../src ../../../src/third_party) include(FetchContent) diff --git a/test/misc/lanczos/lanczos_test.cu b/test/misc/lanczos/lanczos_test.cu index 5d8c4181..11aba673 100644 --- a/test/misc/lanczos/lanczos_test.cu +++ b/test/misc/lanczos/lanczos_test.cu @@ -7,6 +7,7 @@ #include"misc/LanczosAlgorithm.cuh" #include #include +#include #include using namespace uammd; diff --git a/test/utils/CMakeLists.txt b/test/utils/CMakeLists.txt index 0db625cf..4e4a21eb 100644 --- a/test/utils/CMakeLists.txt +++ b/test/utils/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) set(CUDA_ARCHITECTURES OFF) -set(UAMMD_INCLUDE ../../../uammd/src ../../../uammd/src/third_party) +set(UAMMD_INCLUDE ../../src ../../src/third_party) include(FetchContent)