Skip to content

Commit

Permalink
Merge branch 'develop' into feature/sycl
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Nov 22, 2024
2 parents 6237fea + 9fa8615 commit 3c3d80a
Show file tree
Hide file tree
Showing 57 changed files with 2,160 additions and 433 deletions.
80 changes: 80 additions & 0 deletions include/blas_3d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#pragma once

#include <quda_internal.h>
#include <color_spinor_field.h>

namespace quda
{

namespace blas3d
{

// Local enum for the 3D copy type
enum class copyType { COPY_TO_3D, COPY_FROM_3D, SWAP_3D };

/**
@brief Extract / insert / swap a timeslice between a 4-d field and a 3-d field
@param[in] slice Which slice
@param[in] type Extracting a time slice (COPY_TO_3D) or
inserting a timeslice (COPY_FROM_3D) or swapping time slices
(SWAP_3D)
@param[in,out] x 3-d field
@param[in,out] y 4-d field
*/
void copy(int slice, copyType type, ColorSpinorField &x, ColorSpinorField &y);

/**
@brief Swap the slice in two given fields
@param[in] slice The slice we wish to swap in the fields
@param[in,out] x Field whose slice we wish to swap
@param[in,out] y Field whose slice we wish to swap
*/
void swap(int slice, ColorSpinorField &x, ColorSpinorField &y);

/**
@brief Compute a set of real-valued inner products <x, y>, where each inner
product is restricted to a timeslice.
@param[out] result Vector of spatial inner products
@param[in] x Left vector field
@param[in] y Right vector field
*/
void reDotProduct(std::vector<double> &result, const ColorSpinorField &x, const ColorSpinorField &y);

/**
@brief Compute a set of complex-valued inner products <x, y>, where each inner
product is restricted to a timeslice.
@param[out] result Vector of spatial inner products
@param[in] x Left vector field
@param[in] y Right vector field
*/
void cDotProduct(std::vector<Complex> &result, const ColorSpinorField &a, const ColorSpinorField &b);

/**
@brief Timeslice real-valued scaling of the field
@param[in] a Vector of scale factors (length = local temporary extent)
@param[in] x Field we we wish to scale
*/
void ax(std::vector<double> &a, ColorSpinorField &x);

/**
@brief Timeslice real-valued axpby computation
@param[in] a Vector of scale factors (length = local temporary extent)
@param[in] x Input field
@param[in] b Vector of scale factors (length = local temporary extent)
@param[in,out] y Field we are updating
*/
void axpby(const std::vector<double> &a, const ColorSpinorField &x, const std::vector<double> &b,
ColorSpinorField &y);

/**
@brief Timeslice complex-valued axpby computation
@param[in] a Vector of scale factors (length = local temporary extent)
@param[in] x Input field
@param[in] b Vector of scale factors (length = local temporary extent)
@param[in,out] y Field we are updating
*/
void caxpby(const std::vector<Complex> &a, const ColorSpinorField &x, const std::vector<Complex> &b,
ColorSpinorField &y);

} // namespace blas3d
} // namespace quda
34 changes: 34 additions & 0 deletions include/color_spinor_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ namespace quda
//! for deflation etc.
if (is_composite) printfQuda("Number of elements = %d\n", composite_dim);
}

void change_dim(int D, int d) { x[D] = d; }
};

struct DslashConstant;
Expand Down Expand Up @@ -1054,6 +1056,38 @@ namespace quda
*/
void spinorDistanceReweight(ColorSpinorField &src, double alpha0, int t0);

/**
@brief Helper function for determining if the spin of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@return If spin is unique return the number of spins
*/
inline int Spin_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
{
int nSpin = 0;
if (a.Nspin() == b.Nspin())
nSpin = a.Nspin();
else
errorQuda("Spin %d %d do not match (%s:%d in %s())", a.Nspin(), b.Nspin(), file, line, func);
return nSpin;
}

/**
@brief Helper function for determining if the spin of the fields is the same.
@param[in] a Input field
@param[in] b Input field
@param[in] args List of additional fields to check spin on
@return If spins is unique return the number of spins
*/
template <typename... Args>
inline int Spin_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b,
const Args &...args)
{
return Spin_(func, file, line, a, b) & Spin_(func, file, line, a, args...);
}

#define checkSpin(...) Spin_(__func__, __FILE__, __LINE__, __VA_ARGS__)

/**
@brief Helper function for determining if the preconditioning
type of the fields is the same.
Expand Down
22 changes: 14 additions & 8 deletions include/comm_key.h
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
#pragma once

#include <array.h>

namespace quda
{

struct CommKey {

static constexpr int n_dim = 4;
array<int, n_dim> key = {0, 0, 0, 0};

int array[n_dim] = {0, 0, 0, 0};
constexpr int product() { return key[0] * key[1] * key[2] * key[3]; }

constexpr inline int product() { return array[0] * array[1] * array[2] * array[3]; }
constexpr int &operator[](int d) { return key[d]; }

constexpr inline int &operator[](int d) { return array[d]; }
constexpr const int &operator[](int d) const { return key[d]; }

constexpr inline const int &operator[](int d) const { return array[d]; }
constexpr auto data() { return key.data; }

constexpr inline int *data() { return array; }
constexpr auto data() const { return key.data; }

constexpr inline const int *data() const { return array; }
constexpr bool is_valid() const { return (key[0] > 0) && (key[1] > 0) && (key[2] > 0) && (key[3] > 0); }

constexpr inline bool is_valid() const
bool operator==(const CommKey &other) const
{
return (array[0] > 0) && (array[1] > 0) && (array[2] > 0) && (array[3] > 0);
bool is_same = true;
for (auto i = 0; i < n_dim; i++)
if (key[i] != other.key[i]) is_same = false;
return is_same;
}
};

Expand Down
16 changes: 15 additions & 1 deletion include/comm_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,26 @@ namespace quda
int comm_dim(int dim);

/**
Return the coording of this process in the dimension dim
Return the global number of processes in the dimension dim
@param dim Dimension which we are querying
@return Length of process dimensions
*/
int comm_dim_global(int dim);

/**
Return the coordinate of this process in the dimension dim
@param dim Dimension which we are querying
@return Coordinate of this process
*/
int comm_coord(int dim);

/**
Return the global coordinates of this process in the dimension dim
@param dim Dimension which we are querying
@return Coordinate of this process
*/
int comm_coord_global(int dim);

/**
* Declare a message handle for sending `nbytes` to the `rank` with `tag`.
*/
Expand Down
5 changes: 4 additions & 1 deletion include/dslash_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -773,9 +773,12 @@ namespace quda
@param[in] a Scale factor applied to derivative
@param[in] b Scale factor applied to aux field
@param[in] x Vector field we accumulate onto to
@param[in] parity Destination parity
@param[in] comm_override Override for which dimensions are partitioned
@param[in] profile The TimeProfile used for profiling the dslash
*/
void ApplyLaplace(cvector_ref<ColorSpinorField> &out, cvector_ref<const ColorSpinorField> &in, const GaugeField &U,
int dir, double a, double b, cvector_ref<const ColorSpinorField> &x, int parity, bool dagger,
int dir, double a, double b, cvector_ref<const ColorSpinorField> &x, int parity,
const int *comm_override, TimeProfile &profile);

/**
Expand Down
128 changes: 113 additions & 15 deletions include/eigensolve_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ namespace quda
@param[in] out Output spinor
@param[in] in Input spinor
*/
double estimateChebyOpMax(ColorSpinorField &out, ColorSpinorField &in);
virtual double estimateChebyOpMax(ColorSpinorField &out, ColorSpinorField &in);

/**
@brief Orthogonalise input vectors r against
Expand Down Expand Up @@ -263,22 +263,10 @@ namespace quda
@brief Compute eigenvalues and their residiua
@param[in] mat Matrix operator
@param[in] evecs The eigenvectors
@param[in] evals The eigenvalues
@param[in,out] evals The eigenvalues
@param[in] size The number of eigenvalues to compute
*/
void computeEvals(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals,
int size);

/**
@brief Compute eigenvalues and their residiua. This variant compute the number of converged eigenvalues.
@param[in] mat Matrix operator
@param[in] evecs The eigenvectors
@param[in] evals The eigenvalues
*/
void computeEvals(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals)
{
computeEvals(evecs, evals, n_conv);
}
virtual void computeEvals(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals, int size = 0);

/**
@brief Load and check eigenpairs from file
Expand Down Expand Up @@ -461,6 +449,116 @@ namespace quda
void computeBlockKeptRitz(std::vector<ColorSpinorField> &kSpace);
};

/**
@brief Thick Restarted Lanczos Method for 3D slices
*/
class TRLM3D : public EigenSolver
{
bool verbose_rank = false; /** Whether this rank is one that logs */

public:
/**
@brief Constructor for Thick Restarted Eigensolver class
@param eig_param The eigensolver parameters
@param mat The operator to solve
*/
TRLM3D(const DiracMatrix &mat, QudaEigParam *eig_param);

/**
@return Whether the solver is only for Hermitian systems
*/
virtual bool hermitian() override { return true; } /** TRLM3D is only for Hermitian systems */

// Variable size matrix (for the 3D problem)
std::vector<std::vector<double>> ritz_mat_3D;

// Arrays for 3D residua
std::vector<std::vector<double>> residua_3D;

// Array for convergence
std::vector<bool> converged_3D;
std::vector<bool> active_3D;
std::vector<int> iter_locked_3D;
std::vector<int> iter_keep_3D;
std::vector<int> iter_converged_3D;
std::vector<int> num_locked_3D;
std::vector<int> num_keep_3D;
std::vector<int> num_converged_3D;

// Tridiagonal/Arrow matrices, fixed size (for the 3D problem)
std::vector<std::vector<double>> alpha_3D;
std::vector<std::vector<double>> beta_3D;

// The orthogonal direction and size in the 3D problem
int ortho_dim;
int ortho_dim_size;

/**
@brief Compute eigenpairs
@param[in] kSpace Krylov vector space
@param[in] evals Computed eigenvalues
*/
void operator()(std::vector<ColorSpinorField> &kSpace, std::vector<Complex> &evals) override;

/**
@brief Lanczos step: extends the Krylov space.
@param[in] v Vector space
@param[in] j Index of vector being computed
*/
void lanczosStep3D(std::vector<ColorSpinorField> &v, int j);

/**
@brief Reorder the Krylov space by eigenvalue
@param[in] kSpace the Krylov space
*/
void reorder3D(std::vector<ColorSpinorField> &kSpace);

/**
@brief Get the eigendecomposition from the arrow matrix
*/
void eigensolveFromArrowMat3D();

/**
@brief Rotate the Ritz vectors using the arrow matrix eigendecomposition
@param[in] nKspace current Krylov space
*/
void computeKeptRitz3D(std::vector<ColorSpinorField> &kSpace);

/**
@brief Orthogonalise input vectors r against
vector space v using block-BLAS
@param[in] v Vector space
@param[in] r Vectors to be orthogonalised
@param[in] j Use vectors v[0:j]
@param[in] s array of
*/
void blockOrthogonalize3D(std::vector<ColorSpinorField> &v, std::vector<ColorSpinorField> &r, int j);

/**
@brief Check for an initial guess. If none present, populate with rands, then
orthonormalise
@param[in] kSpace The Krylov space vectors
*/
void prepareInitialGuess3D(std::vector<ColorSpinorField> &kSpace, int ortho_dim_size);

/**
@brief Estimate the spectral radius of the operator for the max value of the
Chebyshev polynomial
@param[in] mat Matrix operator
@param[in] out Output spinor
@param[in] in Input spinor
*/
double estimateChebyOpMax(ColorSpinorField &out, ColorSpinorField &in) override;

/**
@brief Compute eigenvalues and their residiua
@param[in] evecs The eigenvectors
@param[in,out] evals The eigenvalues
@param[in] size The number of eigenvalues to compute
*/
void computeEvals(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals, int size = 0) override;
};

/**
@brief Implicitly Restarted Arnoldi Method.
*/
Expand Down
3 changes: 1 addition & 2 deletions include/enum_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ typedef enum QudaInverterType_s {
typedef enum QudaEigType_s {
QUDA_EIG_TR_LANCZOS, // Thick restarted lanczos solver
QUDA_EIG_BLK_TR_LANCZOS, // Block Thick restarted lanczos solver
QUDA_EIG_TR_LANCZOS_3D, // Thick restarted lanczos solver for 3-d systems
QUDA_EIG_IR_ARNOLDI, // Implicitly Restarted Arnoldi solver
QUDA_EIG_BLK_IR_ARNOLDI, // Block Implicitly Restarted Arnoldi solver
QUDA_EIG_INVALID = QUDA_INVALID_ENUM
Expand Down Expand Up @@ -277,8 +278,6 @@ typedef enum QudaVerbosity_s {
QUDA_INVALID_VERBOSITY = QUDA_INVALID_ENUM
} QudaVerbosity;

typedef enum QudaTune_s { QUDA_TUNE_NO, QUDA_TUNE_YES, QUDA_TUNE_INVALID = QUDA_INVALID_ENUM } QudaTune;

typedef enum QudaPreserveDirac_s {
QUDA_PRESERVE_DIRAC_NO,
QUDA_PRESERVE_DIRAC_YES,
Expand Down
Loading

0 comments on commit 3c3d80a

Please sign in to comment.