Made functions inline to resolve multiple definition issues during co…

…mpilation and linking
RaulPPelaez · Jun 25, 2024 · e96f1a9 · e96f1a9
1 parent e3db524
commit e96f1a9
Show file tree

Hide file tree

Showing 11 changed files with 67 additions and 5 deletions.
diff --git a/src/Integrator/BDHI/BDHI_FCM.cu b/src/Integrator/BDHI/BDHI_FCM.cu
@@ -4,6 +4,7 @@
 #include"BDHI_FCM.cuh"
 namespace uammd{
   namespace BDHI{
+    inline
     auto FCMIntegrator::computeHydrodynamicDisplacements(){
       auto pos = pd->getPos(access::location::gpu, access::mode::read);
       auto force = pd->getForce(access::location::gpu, access::mode::read);
@@ -15,6 +16,7 @@ namespace uammd{
 						   temperature, 1.0/sqrt(dt), st);
     }
 
+    inline
     void FCMIntegrator::updateInteractors(){
       for(auto forceComp: interactors) forceComp->updateSimulationTime(steps*dt);
       if(steps==1){
@@ -26,6 +28,7 @@ namespace uammd{
       }
     }
 
+    inline
     void FCMIntegrator::resetForces(){
       int numberParticles = pg->getNumberParticles();
       auto force = pd->getForce(access::location::gpu, access::mode::write);
@@ -34,6 +37,7 @@ namespace uammd{
       CudaCheckError();
     }
 
+    inline
     void FCMIntegrator::resetTorques(){
       int numberParticles = pg->getNumberParticles();
       auto torque = pd->getTorque(access::location::gpu, access::mode::write);
@@ -42,6 +46,7 @@ namespace uammd{
       CudaCheckError();
     }
 
+    inline
     void FCMIntegrator::computeCurrentForces(){
       resetForces();
       if (pd->isDirAllocated()) resetTorques();
@@ -56,6 +61,7 @@ namespace uammd{
       /*With all the terms computed, update the positions*/
       /*T=0 case is templated*/
       template<class IndexIterator>
+      inline
       __global__ void integrateEulerMaruyamaD(real4* pos,
 					      real4* dir,
 					      IndexIterator indexIterator,
@@ -75,7 +81,7 @@ namespace uammd{
 	/*Write to global memory*/
 	pos[i] = make_real4(p,c);
 	/*Update the orientation*/
-	if(dir){ 
+	if(dir){
 	  Quat dirc = dir[i];
 	  //printf("W %f %f %f\n", angularV[id].x, angularV[id].y, angularV[id].z);
 	  //printf("V %f %f %f\n", linearV[id].x, linearV[id].y, linearV[id].z);
@@ -86,6 +92,7 @@ namespace uammd{
       }
     }
 
+    inline
     void FCMIntegrator::forwardTime(){
       steps++;
       sys->log<System::DEBUG1>("[BDHI::FCM] Performing integration step %d", steps);
@@ -97,12 +104,12 @@ namespace uammd{
       auto angularVelocities = disp.second;
       auto indexIter = pg->getIndexIterator(access::location::gpu);
       auto pos = pd->getPos(access::location::gpu, access::mode::readwrite);
-      auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite);      
+      auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite);
       real3* d_linearV = thrust::raw_pointer_cast(linearVelocities.data());
       real3* d_angularV = dir.raw()?thrust::raw_pointer_cast(angularVelocities.data()):nullptr;
       int BLOCKSIZE = 128; /*threads per block*/
       int nthreads = BLOCKSIZE<numberParticles?BLOCKSIZE:numberParticles;
-      int nblocks = numberParticles/nthreads + ((numberParticles%nthreads!=0)?1:0);      
+      int nblocks = numberParticles/nthreads + ((numberParticles%nthreads!=0)?1:0);
       FCM_ns::integrateEulerMaruyamaD<<<nblocks, nthreads, 0, st>>>(pos.raw(),
 								    dir.raw(),
 								    indexIter,

diff --git a/src/Integrator/BDHI/FCM/FCM_impl.cuh b/src/Integrator/BDHI/FCM/FCM_impl.cuh
@@ -224,6 +224,7 @@ namespace uammd{
       };
 
       template<class IterPos, class IterForce, class Kernel>
+      inline
       cached_vector<real3> spreadForces(IterPos& pos, IterForce& force,
 					int numberParticles,
 					std::shared_ptr<Kernel> kernel,
@@ -248,6 +249,7 @@ namespace uammd{
       };
 
       template<class T, class T3, class Container>
+      inline
       auto getCoordinateVector(Container &v, int coord){
 	cached_vector<T> v_a(v.size());
 	T* ptr= (T*)thrust::raw_pointer_cast(v.data());
@@ -264,14 +266,17 @@ namespace uammd{
 	  return {thrust::get<0>(a), thrust::get<1>(a),thrust::get<2>(a)};
 	}
       };
+
       template<class T3, class Container>
+      inline
       auto interleave(Container &a, Container &b, Container &c){
 	auto zip = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin()));
 	cached_vector<T3> res(a.size());
         thrust::transform(zip, zip+a.size(), res.begin(), Zip23());
 	return res;
       }
 
+      inline
       cached_vector<complex3> forwardTransform(cached_vector<real3>& gridReal,
 						    int3 n,
 						    cufftHandle plan, cudaStream_t st){
@@ -286,6 +291,7 @@ namespace uammd{
       }
 
 
+      inline
       __global__ void addTorqueCurl(complex3 *gridTorquesFourier, complex3* gridVelsFourier, Grid grid){
 	int id = blockDim.x*blockIdx.x + threadIdx.x;
 	const int3 nk = grid.cellDim;
@@ -307,6 +313,7 @@ namespace uammd{
 
 
       template<class IterPos, class IterTorque, class Kernel>
+      inline
       void addSpreadTorquesFourier(IterPos& pos, IterTorque& torque, int numberParticles,
 				   Grid grid,
 				   std::shared_ptr<Kernel> kernel,
@@ -339,6 +346,7 @@ namespace uammd{
 	 Output:gridVels = B·FFTf·S·F -> B \propto (I-k^k/|k|^2)
        */
       /*A thread per fourier node*/
+      inline
       __global__ void forceFourier2Vel(const complex3 * gridForces, /*Input array*/
 				       complex3 * gridVels, /*Output array, can be the same as input*/
 				       real vis,
@@ -361,6 +369,7 @@ namespace uammd{
         gridVels[id] = projectFourier(k2, dk, factor)*(B/real(ncells.x*ncells.y*ncells.z));
       }
 
+      inline
       void convolveFourier(cached_vector<complex3>& gridVelsFourier, real viscosity, Grid grid, cudaStream_t st){
 	System::log<System::DEBUG2>("[BDHI::FCM] Wave space velocity scaling");
 	/*Scale the wave space grid forces, transforming in velocities -> B·FFT·S·F*/
@@ -379,6 +388,7 @@ namespace uammd{
 	This kernel gets v_k = gridVelsFourier = B·FFtt·S·F as input and adds 1/√σ·√B(k)·dWw.
 	Keeping special care that v_k = v*_{N-k}, which implies that dWw_k = dWw*_{N-k}
       */
+      inline
       __global__ void fourierBrownianNoise(complex3 * gridVelsFourier,
 					   Grid grid,
 					   real prefactor,/* sqrt(2·T/dt)*/
@@ -451,6 +461,7 @@ namespace uammd{
 	}
 }
 
+      inline
       void addBrownianNoise(cached_vector<complex3>& gridVelsFourier,
 			    real temperature, real viscosity, real prefactor,
 			    uint seed,
@@ -487,6 +498,7 @@ namespace uammd{
 	}
       }
 
+      inline
       cached_vector<real3> inverseTransform(cached_vector<complex3>& gridFourier,
 					    int3 n, cufftHandle plan, cudaStream_t st){
 	int nx = 2*(n.x/2+1);
@@ -501,6 +513,7 @@ namespace uammd{
       }
 
       template<class IterPos, class Kernel>
+      inline
       cached_vector<real3> interpolateVelocity(IterPos& pos, cached_vector<real3>& gridVels,
 					       Grid grid, std::shared_ptr<Kernel> kernel,
 					       int numberParticles, cudaStream_t st){
@@ -524,6 +537,7 @@ namespace uammd{
       // = 0.5( i*k_y*V_z - i*k_z(V_y), i*k_z(V_x) - i*k_x*V_z, i*k_x*V_y - i*k_y*V_x)
       //Overwrite the output vector with the angular velocities in Fourier space
       //The input velocity vector is overwritten
+      inline
       __global__ void computeVelocityCurlFourier(const complex3 *gridVelsFourier,
 						 complex3* gridAngVelsFourier,
 						 Grid grid){
@@ -549,6 +563,7 @@ namespace uammd{
 	gridAngVelsFourier[id] = gridAng;
       }
 
+      inline
       cached_vector<complex3> computeGridAngularVelocityFourier(cached_vector<complex3>& gridVelsFourier,
 								Grid grid,  cudaStream_t st){
 	const int3 n = grid.cellDim;
@@ -565,6 +580,7 @@ namespace uammd{
       }
 
       template<class IterPos, class Kernel>
+      inline
       cached_vector<real3> interpolateAngularVelocity(IterPos& pos,
 						      cached_vector<real3>& gridAngVels,
 						      Grid grid,
@@ -584,6 +600,7 @@ namespace uammd{
     }
 
     template<class Kernel, class KernelTorque>
+    inline
     std::pair<cached_vector<real3>, cached_vector<real3>>
     FCM_impl<Kernel, KernelTorque>::computeHydrodynamicDisplacements(real4* pos,
 								     real4* force, real4* torque,

diff --git a/src/Integrator/BDHI/FCM/utils.cuh b/src/Integrator/BDHI/FCM/utils.cuh
@@ -22,6 +22,7 @@ namespace uammd{
       using complex = cufftComplex_t<real>;
       using complex3 = cufftComplex3_t<real>;
 
+      inline
       __device__ int3 indexToWaveNumber(int i, int3 nk){
 	int ikx = i%(nk.x/2+1);
 	int iky = (i/(nk.x/2+1))%nk.y;
@@ -32,10 +33,12 @@ namespace uammd{
 	return make_int3(ikx, iky, ikz);
       }
 
+      inline
       __device__ real3 waveNumberToWaveVector(int3 ik, real3 L){
 	return (real(2.0)*real(M_PI)/L)*make_real3(ik.x, ik.y, ik.z);
       }
 
+      inline
       __device__ real3 getGradientFourier(int3 ik, int3 nk, real3 L){
 	const bool isUnpairedX = ik.x == (nk.x - ik.x);
 	const bool isUnpairedY = ik.y == (nk.y - ik.y);
@@ -55,6 +58,7 @@ namespace uammd{
 	  unpaired modes set to zero
 	fr is the factor to project
       */
+      inline
       __device__ real3 projectFourier(real k2, real3 dk, real3 fr){
 	const real invk2 = real(1.0)/k2;
 	real3 vr = fr - dk*dot(fr, dk*invk2);
@@ -68,6 +72,7 @@ namespace uammd{
 	  unpaired modes set to zero
 	fr is the factor to project
       */
+      inline
       __device__ complex3 projectFourier(real k2, real3 dk, complex3 factor){
 	real3 re = projectFourier(k2, dk, make_real3(factor.x.x, factor.y.x, factor.z.x));
 	real3 imag = projectFourier(k2, dk, make_real3(factor.x.y, factor.y.y, factor.z.y));
@@ -77,6 +82,7 @@ namespace uammd{
 
       /*Compute gaussian complex noise dW, std = prefactor -> ||z||^2 = <x^2>/sqrt(2)+<y^2>/sqrt(2) = prefactor*/
       /*A complex random number for each direction*/
+      inline
       __device__ complex3 generateNoise(real prefactor, uint id, uint seed1, uint seed2){	  //Uncomment to use uniform numbers instead of gaussian
 	Saru saru(id, seed1, seed2);
 	complex3 noise;
@@ -91,6 +97,7 @@ namespace uammd{
 	return noise;
       }
 
+      inline
       __device__ bool isNyquistWaveNumber(int3 cell, int3 ncells){
 	/*Beware of nyquist points! They only appear with even cell dimensions
 	  There are 8 nyquist points at most (cell=0,0,0 is excluded at the start of the kernel)

diff --git a/src/Integrator/VerletNVE.cu b/src/Integrator/VerletNVE.cu
@@ -30,6 +30,7 @@
 
 namespace uammd{
 
+  inline
   VerletNVE::VerletNVE(shared_ptr<ParticleGroup> pg, VerletNVE::Parameters par):
     Integrator(pg, "VerletNVE"),
     dt(par.dt), energy(par.energy), is2D(par.is2D),
@@ -56,6 +57,7 @@ namespace uammd{
     CudaSafeCall(cudaStreamCreate(&stream));
   }
 
+  inline
   VerletNVE::~VerletNVE(){
     cudaStreamDestroy(stream);
   }
@@ -88,6 +90,7 @@ namespace uammd{
       }
   }
 
+  inline
   void VerletNVE::initializeVelocities(){
     //In the first step, compute energy in the system
     //in order to adapt the initial kinetic energy to match the input total energy
@@ -131,6 +134,7 @@ namespace uammd{
   }
 
   template<int step>
+  inline
   void VerletNVE::callIntegrate(){
     int numberParticles = pg->getNumberParticles();
     int Nthreads = 128;
@@ -152,13 +156,15 @@ namespace uammd{
 								       numberParticles, dt, is2D);
   }
 
+  inline
   void VerletNVE::resetForces(){
     int numberParticles = pg->getNumberParticles();
     auto force = pd->getForce(access::location::gpu, access::mode::write);
     auto force_gr = pg->getPropertyIterator(force);
     thrust::fill(thrust::cuda::par.on(stream), force_gr, force_gr + numberParticles, real4());
   }
 
+  inline
   void VerletNVE::firstStepPreparation(){
     if(initVelocities){
       initializeVelocities();
@@ -173,6 +179,7 @@ namespace uammd{
   }
 
   //Move the particles in my group 1 dt in time.
+  inline
   void VerletNVE::forwardTime(){
     steps++;
     sys->log<System::DEBUG1>("[VerletNVE] Performing integration step %d", steps);
@@ -191,6 +198,7 @@ namespace uammd{
 
   namespace verletnve_ns{
     template<class VelocityIterator, class MassIterator, class EnergyIterator>
+    inline
     __global__ void sumKineticEnergy(const VelocityIterator vel,
 				     EnergyIterator energy,
 				     const MassIterator mass,
@@ -203,6 +211,7 @@ namespace uammd{
   };
 
 
+  inline
   real VerletNVE::sumEnergy(){
     int numberParticles = pg->getNumberParticles();
     auto vel_gr = pg->getPropertyIterator(pd->getVel(access::location::gpu, access::mode::read));

diff --git a/src/Interactor/Potential/DPD.cuh b/src/Interactor/Potential/DPD.cuh
@@ -168,11 +168,13 @@ namespace uammd{
     };
 
     template<>
+    inline
     void DPD_impl<DefaultDissipation>::printGamma(){
       System::log<System::MESSAGE>("[Potential::DPD] gamma: %f", gamma.gamma);
     }
 
     template<class T>
+    inline
     void DPD_impl<T>::printGamma(){
       System::log<System::MESSAGE>("[Potential::DPD] Using %s for dissipation", type_name<T>().c_str());
     }

diff --git a/src/misc/IBM_kernels.cuh b/src/misc/IBM_kernels.cuh
@@ -41,6 +41,7 @@ namespace uammd{
 
       //Sum all values in a container using Kahan Summation, which increases floating point accuracy
       template<class Container>
+      inline
       auto kahanSum(Container &v){
 	auto c = v[0]*0;
 	auto sum = c;
@@ -55,6 +56,7 @@ namespace uammd{
 
       //Integrate the function foo(x) from x=rmin to x=rmax using the Simpson rule with Nr intervals
       template<class Foo>
+      inline
       auto integrate(Foo foo, real rmin, real rmax, int Nr){
 	using T = decltype(foo(rmin));
 	if(Nr%2 == 1) Nr++; //Need an even number of points
@@ -74,6 +76,7 @@ namespace uammd{
     }
 
     //[1] Taken from https://arxiv.org/pdf/1712.04732.pdf
+    inline
     __host__ __device__ real BM(real zz, real alpha, real beta){
       const real z = zz/alpha;
       const real z2 = z*z;
@@ -114,7 +117,7 @@ namespace uammd{
 	const real invh;
 	static constexpr int support = 3;
 	threePoint(real h):invh(1.0/h){}
-	__host__ __device__ real phi(real rr, real3 pos = real3()) const{
+	inline __host__ __device__ real phi(real rr, real3 pos = real3()) const{
 	  const real r = fabs(rr)*invh;
 	  if(r<real(0.5)){
 	    constexpr real onediv3 = real(1/3.0);