Skip to content

Commit

Permalink
Made functions inline to resolve multiple definition issues during co…
Browse files Browse the repository at this point in the history
…mpilation and linking
  • Loading branch information
PabloIbannez committed Jun 25, 2024
1 parent e3db524 commit e96f1a9
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 5 deletions.
13 changes: 10 additions & 3 deletions src/Integrator/BDHI/BDHI_FCM.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include"BDHI_FCM.cuh"
namespace uammd{
namespace BDHI{
inline
auto FCMIntegrator::computeHydrodynamicDisplacements(){
auto pos = pd->getPos(access::location::gpu, access::mode::read);
auto force = pd->getForce(access::location::gpu, access::mode::read);
Expand All @@ -15,6 +16,7 @@ namespace uammd{
temperature, 1.0/sqrt(dt), st);
}

inline
void FCMIntegrator::updateInteractors(){
for(auto forceComp: interactors) forceComp->updateSimulationTime(steps*dt);
if(steps==1){
Expand All @@ -26,6 +28,7 @@ namespace uammd{
}
}

inline
void FCMIntegrator::resetForces(){
int numberParticles = pg->getNumberParticles();
auto force = pd->getForce(access::location::gpu, access::mode::write);
Expand All @@ -34,6 +37,7 @@ namespace uammd{
CudaCheckError();
}

inline
void FCMIntegrator::resetTorques(){
int numberParticles = pg->getNumberParticles();
auto torque = pd->getTorque(access::location::gpu, access::mode::write);
Expand All @@ -42,6 +46,7 @@ namespace uammd{
CudaCheckError();
}

inline
void FCMIntegrator::computeCurrentForces(){
resetForces();
if (pd->isDirAllocated()) resetTorques();
Expand All @@ -56,6 +61,7 @@ namespace uammd{
/*With all the terms computed, update the positions*/
/*T=0 case is templated*/
template<class IndexIterator>
inline
__global__ void integrateEulerMaruyamaD(real4* pos,
real4* dir,
IndexIterator indexIterator,
Expand All @@ -75,7 +81,7 @@ namespace uammd{
/*Write to global memory*/
pos[i] = make_real4(p,c);
/*Update the orientation*/
if(dir){
if(dir){
Quat dirc = dir[i];
//printf("W %f %f %f\n", angularV[id].x, angularV[id].y, angularV[id].z);
//printf("V %f %f %f\n", linearV[id].x, linearV[id].y, linearV[id].z);
Expand All @@ -86,6 +92,7 @@ namespace uammd{
}
}

inline
void FCMIntegrator::forwardTime(){
steps++;
sys->log<System::DEBUG1>("[BDHI::FCM] Performing integration step %d", steps);
Expand All @@ -97,12 +104,12 @@ namespace uammd{
auto angularVelocities = disp.second;
auto indexIter = pg->getIndexIterator(access::location::gpu);
auto pos = pd->getPos(access::location::gpu, access::mode::readwrite);
auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite);
auto dir = pd->getDirIfAllocated(access::location::gpu, access::mode::readwrite);
real3* d_linearV = thrust::raw_pointer_cast(linearVelocities.data());
real3* d_angularV = dir.raw()?thrust::raw_pointer_cast(angularVelocities.data()):nullptr;
int BLOCKSIZE = 128; /*threads per block*/
int nthreads = BLOCKSIZE<numberParticles?BLOCKSIZE:numberParticles;
int nblocks = numberParticles/nthreads + ((numberParticles%nthreads!=0)?1:0);
int nblocks = numberParticles/nthreads + ((numberParticles%nthreads!=0)?1:0);
FCM_ns::integrateEulerMaruyamaD<<<nblocks, nthreads, 0, st>>>(pos.raw(),
dir.raw(),
indexIter,
Expand Down
17 changes: 17 additions & 0 deletions src/Integrator/BDHI/FCM/FCM_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ namespace uammd{
};

template<class IterPos, class IterForce, class Kernel>
inline
cached_vector<real3> spreadForces(IterPos& pos, IterForce& force,
int numberParticles,
std::shared_ptr<Kernel> kernel,
Expand All @@ -248,6 +249,7 @@ namespace uammd{
};

template<class T, class T3, class Container>
inline
auto getCoordinateVector(Container &v, int coord){
cached_vector<T> v_a(v.size());
T* ptr= (T*)thrust::raw_pointer_cast(v.data());
Expand All @@ -264,14 +266,17 @@ namespace uammd{
return {thrust::get<0>(a), thrust::get<1>(a),thrust::get<2>(a)};
}
};

template<class T3, class Container>
inline
auto interleave(Container &a, Container &b, Container &c){
auto zip = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin()));
cached_vector<T3> res(a.size());
thrust::transform(zip, zip+a.size(), res.begin(), Zip23());
return res;
}

inline
cached_vector<complex3> forwardTransform(cached_vector<real3>& gridReal,
int3 n,
cufftHandle plan, cudaStream_t st){
Expand All @@ -286,6 +291,7 @@ namespace uammd{
}


inline
__global__ void addTorqueCurl(complex3 *gridTorquesFourier, complex3* gridVelsFourier, Grid grid){
int id = blockDim.x*blockIdx.x + threadIdx.x;
const int3 nk = grid.cellDim;
Expand All @@ -307,6 +313,7 @@ namespace uammd{


template<class IterPos, class IterTorque, class Kernel>
inline
void addSpreadTorquesFourier(IterPos& pos, IterTorque& torque, int numberParticles,
Grid grid,
std::shared_ptr<Kernel> kernel,
Expand Down Expand Up @@ -339,6 +346,7 @@ namespace uammd{
Output:gridVels = B·FFTf·S·F -> B \propto (I-k^k/|k|^2)
*/
/*A thread per fourier node*/
inline
__global__ void forceFourier2Vel(const complex3 * gridForces, /*Input array*/
complex3 * gridVels, /*Output array, can be the same as input*/
real vis,
Expand All @@ -361,6 +369,7 @@ namespace uammd{
gridVels[id] = projectFourier(k2, dk, factor)*(B/real(ncells.x*ncells.y*ncells.z));
}

inline
void convolveFourier(cached_vector<complex3>& gridVelsFourier, real viscosity, Grid grid, cudaStream_t st){
System::log<System::DEBUG2>("[BDHI::FCM] Wave space velocity scaling");
/*Scale the wave space grid forces, transforming in velocities -> B·FFT·S·F*/
Expand All @@ -379,6 +388,7 @@ namespace uammd{
This kernel gets v_k = gridVelsFourier = B·FFtt·S·F as input and adds 1/√σ·√B(k)·dWw.
Keeping special care that v_k = v*_{N-k}, which implies that dWw_k = dWw*_{N-k}
*/
inline
__global__ void fourierBrownianNoise(complex3 * gridVelsFourier,
Grid grid,
real prefactor,/* sqrt(2·T/dt)*/
Expand Down Expand Up @@ -451,6 +461,7 @@ namespace uammd{
}
}

inline
void addBrownianNoise(cached_vector<complex3>& gridVelsFourier,
real temperature, real viscosity, real prefactor,
uint seed,
Expand Down Expand Up @@ -487,6 +498,7 @@ namespace uammd{
}
}

inline
cached_vector<real3> inverseTransform(cached_vector<complex3>& gridFourier,
int3 n, cufftHandle plan, cudaStream_t st){
int nx = 2*(n.x/2+1);
Expand All @@ -501,6 +513,7 @@ namespace uammd{
}

template<class IterPos, class Kernel>
inline
cached_vector<real3> interpolateVelocity(IterPos& pos, cached_vector<real3>& gridVels,
Grid grid, std::shared_ptr<Kernel> kernel,
int numberParticles, cudaStream_t st){
Expand All @@ -524,6 +537,7 @@ namespace uammd{
// = 0.5( i*k_y*V_z - i*k_z(V_y), i*k_z(V_x) - i*k_x*V_z, i*k_x*V_y - i*k_y*V_x)
//Overwrite the output vector with the angular velocities in Fourier space
//The input velocity vector is overwritten
inline
__global__ void computeVelocityCurlFourier(const complex3 *gridVelsFourier,
complex3* gridAngVelsFourier,
Grid grid){
Expand All @@ -549,6 +563,7 @@ namespace uammd{
gridAngVelsFourier[id] = gridAng;
}

inline
cached_vector<complex3> computeGridAngularVelocityFourier(cached_vector<complex3>& gridVelsFourier,
Grid grid, cudaStream_t st){
const int3 n = grid.cellDim;
Expand All @@ -565,6 +580,7 @@ namespace uammd{
}

template<class IterPos, class Kernel>
inline
cached_vector<real3> interpolateAngularVelocity(IterPos& pos,
cached_vector<real3>& gridAngVels,
Grid grid,
Expand All @@ -584,6 +600,7 @@ namespace uammd{
}

template<class Kernel, class KernelTorque>
inline
std::pair<cached_vector<real3>, cached_vector<real3>>
FCM_impl<Kernel, KernelTorque>::computeHydrodynamicDisplacements(real4* pos,
real4* force, real4* torque,
Expand Down
7 changes: 7 additions & 0 deletions src/Integrator/BDHI/FCM/utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace uammd{
using complex = cufftComplex_t<real>;
using complex3 = cufftComplex3_t<real>;

inline
__device__ int3 indexToWaveNumber(int i, int3 nk){
int ikx = i%(nk.x/2+1);
int iky = (i/(nk.x/2+1))%nk.y;
Expand All @@ -32,10 +33,12 @@ namespace uammd{
return make_int3(ikx, iky, ikz);
}

inline
__device__ real3 waveNumberToWaveVector(int3 ik, real3 L){
return (real(2.0)*real(M_PI)/L)*make_real3(ik.x, ik.y, ik.z);
}

inline
__device__ real3 getGradientFourier(int3 ik, int3 nk, real3 L){
const bool isUnpairedX = ik.x == (nk.x - ik.x);
const bool isUnpairedY = ik.y == (nk.y - ik.y);
Expand All @@ -55,6 +58,7 @@ namespace uammd{
unpaired modes set to zero
fr is the factor to project
*/
inline
__device__ real3 projectFourier(real k2, real3 dk, real3 fr){
const real invk2 = real(1.0)/k2;
real3 vr = fr - dk*dot(fr, dk*invk2);
Expand All @@ -68,6 +72,7 @@ namespace uammd{
unpaired modes set to zero
fr is the factor to project
*/
inline
__device__ complex3 projectFourier(real k2, real3 dk, complex3 factor){
real3 re = projectFourier(k2, dk, make_real3(factor.x.x, factor.y.x, factor.z.x));
real3 imag = projectFourier(k2, dk, make_real3(factor.x.y, factor.y.y, factor.z.y));
Expand All @@ -77,6 +82,7 @@ namespace uammd{

/*Compute gaussian complex noise dW, std = prefactor -> ||z||^2 = <x^2>/sqrt(2)+<y^2>/sqrt(2) = prefactor*/
/*A complex random number for each direction*/
inline
__device__ complex3 generateNoise(real prefactor, uint id, uint seed1, uint seed2){ //Uncomment to use uniform numbers instead of gaussian
Saru saru(id, seed1, seed2);
complex3 noise;
Expand All @@ -91,6 +97,7 @@ namespace uammd{
return noise;
}

inline
__device__ bool isNyquistWaveNumber(int3 cell, int3 ncells){
/*Beware of nyquist points! They only appear with even cell dimensions
There are 8 nyquist points at most (cell=0,0,0 is excluded at the start of the kernel)
Expand Down
9 changes: 9 additions & 0 deletions src/Integrator/VerletNVE.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

namespace uammd{

inline
VerletNVE::VerletNVE(shared_ptr<ParticleGroup> pg, VerletNVE::Parameters par):
Integrator(pg, "VerletNVE"),
dt(par.dt), energy(par.energy), is2D(par.is2D),
Expand All @@ -56,6 +57,7 @@ namespace uammd{
CudaSafeCall(cudaStreamCreate(&stream));
}

inline
VerletNVE::~VerletNVE(){
cudaStreamDestroy(stream);
}
Expand Down Expand Up @@ -88,6 +90,7 @@ namespace uammd{
}
}

inline
void VerletNVE::initializeVelocities(){
//In the first step, compute energy in the system
//in order to adapt the initial kinetic energy to match the input total energy
Expand Down Expand Up @@ -131,6 +134,7 @@ namespace uammd{
}

template<int step>
inline
void VerletNVE::callIntegrate(){
int numberParticles = pg->getNumberParticles();
int Nthreads = 128;
Expand All @@ -152,13 +156,15 @@ namespace uammd{
numberParticles, dt, is2D);
}

inline
void VerletNVE::resetForces(){
int numberParticles = pg->getNumberParticles();
auto force = pd->getForce(access::location::gpu, access::mode::write);
auto force_gr = pg->getPropertyIterator(force);
thrust::fill(thrust::cuda::par.on(stream), force_gr, force_gr + numberParticles, real4());
}

inline
void VerletNVE::firstStepPreparation(){
if(initVelocities){
initializeVelocities();
Expand All @@ -173,6 +179,7 @@ namespace uammd{
}

//Move the particles in my group 1 dt in time.
inline
void VerletNVE::forwardTime(){
steps++;
sys->log<System::DEBUG1>("[VerletNVE] Performing integration step %d", steps);
Expand All @@ -191,6 +198,7 @@ namespace uammd{

namespace verletnve_ns{
template<class VelocityIterator, class MassIterator, class EnergyIterator>
inline
__global__ void sumKineticEnergy(const VelocityIterator vel,
EnergyIterator energy,
const MassIterator mass,
Expand All @@ -203,6 +211,7 @@ namespace uammd{
};


inline
real VerletNVE::sumEnergy(){
int numberParticles = pg->getNumberParticles();
auto vel_gr = pg->getPropertyIterator(pd->getVel(access::location::gpu, access::mode::read));
Expand Down
2 changes: 2 additions & 0 deletions src/Interactor/Potential/DPD.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,13 @@ namespace uammd{
};

template<>
inline
void DPD_impl<DefaultDissipation>::printGamma(){
System::log<System::MESSAGE>("[Potential::DPD] gamma: %f", gamma.gamma);
}

template<class T>
inline
void DPD_impl<T>::printGamma(){
System::log<System::MESSAGE>("[Potential::DPD] Using %s for dissipation", type_name<T>().c_str());
}
Expand Down
5 changes: 4 additions & 1 deletion src/misc/IBM_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace uammd{

//Sum all values in a container using Kahan Summation, which increases floating point accuracy
template<class Container>
inline
auto kahanSum(Container &v){
auto c = v[0]*0;
auto sum = c;
Expand All @@ -55,6 +56,7 @@ namespace uammd{

//Integrate the function foo(x) from x=rmin to x=rmax using the Simpson rule with Nr intervals
template<class Foo>
inline
auto integrate(Foo foo, real rmin, real rmax, int Nr){
using T = decltype(foo(rmin));
if(Nr%2 == 1) Nr++; //Need an even number of points
Expand All @@ -74,6 +76,7 @@ namespace uammd{
}

//[1] Taken from https://arxiv.org/pdf/1712.04732.pdf
inline
__host__ __device__ real BM(real zz, real alpha, real beta){
const real z = zz/alpha;
const real z2 = z*z;
Expand Down Expand Up @@ -114,7 +117,7 @@ namespace uammd{
const real invh;
static constexpr int support = 3;
threePoint(real h):invh(1.0/h){}
__host__ __device__ real phi(real rr, real3 pos = real3()) const{
inline __host__ __device__ real phi(real rr, real3 pos = real3()) const{
const real r = fabs(rr)*invh;
if(r<real(0.5)){
constexpr real onediv3 = real(1/3.0);
Expand Down
Loading

0 comments on commit e96f1a9

Please sign in to comment.