Skip to content

Commit

Permalink
- added Dmul() function to NL blas
Browse files Browse the repository at this point in the history
- finished NLCUDA backend for AMGCL (to be tested now)
  • Loading branch information
BrunoLevy committed Nov 17, 2024
1 parent c75963c commit fca4d8c
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 19 deletions.
64 changes: 47 additions & 17 deletions src/lib/geogram/NL/nl_amgcl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,6 @@ namespace nlcuda_adapters {
NLMatrix impl_;
};

/**
* \brief a diagonal matrix, stored in device memory
*/
typedef matrix matrix_diagonal;

/**
* \brief a vector, stored in device memory
*/
Expand All @@ -273,9 +268,13 @@ namespace nlcuda_adapters {
~vector() {
if(data_ != nullptr) {
NL_DELETE_VECTOR(nlCUDABlas(), NL_DEVICE_MEMORY, n_, data_);
n_ = 0;
data_ = nullptr;
}
if(temp_ != nullptr) {
NL_DELETE_VECTOR(nlCUDABlas(), NL_DEVICE_MEMORY, n_, temp_);
}
n_ = 0;
data_ = nullptr;
temp_ = nullptr;
}

vector(const vector& rhs) = delete;
Expand Down Expand Up @@ -338,8 +337,11 @@ namespace nlcuda_adapters {

index_type n_;
double* data_;
mutable double* temp_; // temporary for vmul()
};

typedef vector matrix_diagonal;

/**
* Wrapper around solver::skyline_lu for use with the NLCUDA backend.
* Inspired from AMGCL cuda wrapper
Expand Down Expand Up @@ -556,32 +558,60 @@ namespace amgcl { namespace backend {

template <> struct vmul_impl<
double,
nlcuda_adapters::vector,
nlcuda_adapters::matrix_diagonal,
nlcuda_adapters::vector,
double,
nlcuda_adapters::vector
> {
static void apply(
double a,
const nlcuda_adapters::vector &x,
const nlcuda_adapters::matrix_diagonal &M,
const nlcuda_adapters::vector &y,
double b,
nlcuda_adapters::vector &z
) {
// TODO
nl_assert(false);
nl_assert(M.n_ == y.n_);
nl_assert(M.n_ == z.n_);
int N = M.n_;
NLBlas_t blas = nlCUDABlas();
// z <- a * M * y + b * z

if(b != 0.0) {
// tmp <- z
if(M.temp_ == nullptr) {
M.temp_ = NL_NEW_VECTOR(nlCUDABlas(), NL_DEVICE_MEMORY, N);
blas->Dcopy(blas,N,z.data_,1,M.temp_,1);
}
}

// z <- a * M * y
blas->Dmul(blas,N,M.data_,y.data_,z.data_);
if(a != 1.0) {
blas->Dscal(blas,N,a,y.data_,1);
}

if(b != 0.0) {
// z <- b * tmp + z
blas->Daxpy(blas,N,b,M.temp_,1,z.data_,1);
}
}
};

template <> struct copy_impl<
nlcuda_adapters::vector,
nlcuda_adapters::vector
> {
template <class V> struct copy_impl<V,nlcuda_adapters::vector> {
static void apply(
const nlcuda_adapters::vector &x,
const V &x,
nlcuda_adapters::vector &y
) {
y.copy_from(x);
y.copy_from_host(x.data());
}
};

template <class V> struct copy_impl<nlcuda_adapters::vector,V> {
static void apply(
const nlcuda_adapters::vector &x,
V &y
) {
x.copy_to_host(y.data());
}
};

Expand Down
14 changes: 13 additions & 1 deletion src/lib/geogram/NL/nl_blas.c
Original file line number Diff line number Diff line change
Expand Up @@ -1474,12 +1474,23 @@ static double host_blas_dnrm2(
}

static void host_blas_daxpy(
NLBlas_t blas, int n, double a, const double *x, int incx, double *y, int incy
NLBlas_t blas, int n,
double a, const double *x, int incx, double *y, int incy
) {
blas->flops += (NLulong)(2*n);
NL_FORTRAN_WRAP(daxpy)(&n,&a,(double*)x,&incx,y,&incy);
}

static void host_blas_dmul(
NLBlas_t blas, int n,
const double *x, const double *y, double* z
) {
blas->flops += (NLulong)(n);
for(int i=0; i<n; ++i) {
z[i] = x[i]*y[i];
}
}

static void host_blas_dscal(
NLBlas_t blas, int n, double a, double *x, int incx
) {
Expand Down Expand Up @@ -1530,6 +1541,7 @@ NLBlas_t nlHostBlas(void) {
blas.Ddot = host_blas_ddot;
blas.Dnrm2 = host_blas_dnrm2;
blas.Daxpy = host_blas_daxpy;
blas.Dmul = host_blas_dmul;
blas.Dscal = host_blas_dscal;
blas.Dgemv = host_blas_dgemv;
blas.Dtpsv = host_blas_dtpsv;
Expand Down
14 changes: 14 additions & 0 deletions src/lib/geogram/NL/nl_blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,19 @@ extern "C" {
);


/**
* \brief Component-wise multiplication
* \details In formula: \f$ z \leftarrow x ** y \f$
* \param[in] blas a handle to the BLAS abstraction layer
* \param[in] n dimension of the vectors
* \param[in] x , y source vectors
* \param[in] z destination vector
*/
typedef void (*FUNPTR_dmul)(
NLBlas_t blas, int n,
const double* x, const double* y, double* z
);

/**
* \brief Computes a matrix-vector product
* \details performs one of the matrix-vector operations
Expand Down Expand Up @@ -334,6 +347,7 @@ extern "C" {
FUNPTR_ddot Ddot;
FUNPTR_dnrm2 Dnrm2;
FUNPTR_daxpy Daxpy;
FUNPTR_dmul Dmul;
FUNPTR_dgemv Dgemv;
FUNPTR_dtpsv Dtpsv;

Expand Down
21 changes: 20 additions & 1 deletion src/lib/geogram/NL/nl_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,7 @@ static void nlDiagonalMatrixCUDAMult(
) {
int N = (int)Mcuda->n;
/*
* vector x vector slice-wise product implemented
* vector x vector component-wise product implemented
* using diagonal matrix x matrix function.
*/
nlCUDACheck(CUDA()->cublasDdgmm(
Expand Down Expand Up @@ -1667,6 +1667,24 @@ static void cuda_blas_daxpy(
CUDA()->cublasDaxpy(CUDA()->HNDL_cublas,n,&a,x,incx,y,incy);
}

static void cuda_blas_dmul(
NLBlas_t blas, int n,
const double *x, const double *y, double* z
) {
blas->flops += (NLulong)(n);
/*
* vector x vector component-wise product implemented
* using diagonal matrix x matrix function.
*/
nlCUDACheck(CUDA()->cublasDdgmm(
CUDA()->HNDL_cublas, CUBLAS_SIDE_LEFT,
n, 1,
x, n,
y, 1,
z, n
));
}

static void cuda_blas_dscal(
NLBlas_t blas, int n, double a, double *x, int incx
) {
Expand Down Expand Up @@ -1719,6 +1737,7 @@ NLBlas_t nlCUDABlas(void) {
blas.Ddot = cuda_blas_ddot;
blas.Dnrm2 = cuda_blas_dnrm2;
blas.Daxpy = cuda_blas_daxpy;
blas.Dmul = cuda_blas_dmul;
blas.Dscal = cuda_blas_dscal;
blas.Dgemv = cuda_blas_dgemv;
blas.Dtpsv = cuda_blas_dtpsv;
Expand Down

0 comments on commit fca4d8c

Please sign in to comment.