Skip to content

Commit

Permalink
matrix_vector progress
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Oct 8, 2023
1 parent 36d7034 commit 0aa1bd7
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 29 deletions.
65 changes: 63 additions & 2 deletions benchmarks/stencil.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
st = g.stencil.matrix(
U[0],
[(0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)],
[0],
[1, 2, 3, 4],
code,
)
# test plaquette
Expand Down Expand Up @@ -82,3 +80,66 @@
Total performance : {GFlopsPerSec:.2f} GFlops/s
Effective memory bandwidth : {GBPerSec:.2f} GB/s"""
)

# covariant laplacian stencil
src = g.vspincolor(grid)
rng.cnormal(src)
evec = [(1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)]
nevec = [tuple([-x for x in y]) for y in evec]
UdagShift = [g(g.adj(g.cshift(U[mu], mu, -1))) for mu in range(4)]
_U = [0,1,2,3]
_UdagShift = [4,5,6,7]
_X = 0
_Xp = [1,2,3,4]
_Xm = [5,6,7,8]
code = [(0,1,_X,-1,-8.0,[])]
for mu in range(4):
code.append((0,1,_Xp[mu], 0, 1.0,[(_U[mu], _X, 0)]))
code.append((0,1,_Xm[mu], 0, 1.0,[(_UdagShift[mu], _X, 0)]))
# can switch last line to next one
#code.append((0,1,_Xm[mu], 0, 1.0,[(_U[mu], _Xm[mu], 1)]))
st = g.stencil.matrix_vector(
U[0],
src,
[(0, 0, 0, 0)] + evec + nevec,
code
)
# test laplace
dst = g.lattice(src)
st(U + UdagShift, [dst,src])

lap = g.create.smear.laplace(g.covariant.shift(U, boundary_phases=[1]*4), [0,1,2,3])
dst2 = lap(src)
eps2 = g.norm2(dst - dst2) / g.norm2(dst)
assert eps2 ** 0.5 < precision.eps * 100

# Flops
gauge_otype = U[0].otype
Nc = gauge_otype.shape[0]
Ns = 4
flops_per_matrix_vector_multiply = Ns * (Nc**2 * 6 + (Nc - 1) * Nc * 2)
flops_per_vector_add = Ns * Nc * 2
flops_per_site = 8 * flops_per_matrix_vector_multiply + 8 * flops_per_vector_add
flops = flops_per_site * src.grid.gsites * N
nbytes = (8 * Nc * Nc * 2 + Nc * Ns * 2) * precision.nbytes * src.grid.gsites * N

# Warmup
for n in range(5):
st(U + UdagShift, [dst,src])

# Time
t0 = g.time()
for n in range(N):
st(U + UdagShift, [dst,src])
t1 = g.time()

# Report
GFlopsPerSec = flops / (t1 - t0) / 1e9
GBPerSec = nbytes / (t1 - t0) / 1e9
g.message(
f"""
{N} applications of laplace stencil
Time to complete : {t1-t0:.2f} s
Total performance : {GFlopsPerSec:.2f} GFlops/s
Effective memory bandwidth : {GBPerSec:.2f} GB/s"""
)
3 changes: 0 additions & 3 deletions lib/cgpt/lib/stencil/matrix_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,6 @@ cgpt_stencil_matrix_vector_create(cgpt_Lattice_base* __matrix, GridBase* grid, P
std::vector<cgpt_stencil_matrix_vector_code_t> code;
cgpt_convert(_code,code);

// for now only local is implemented
ASSERT(local);

// test __matrix type against matrix in spin space,
// color space spin+color space, and singlet space
if (is_compatible<typename matrixFromTypeAtLevel<V,2>::type>(__matrix)) {
Expand Down
3 changes: 3 additions & 0 deletions lib/gpt/core/local_stencil/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ def __call__(self, *fields):

def __del__(self):
cgpt.stencil_matrix_delete(self.obj)

def data_access_hints(self, *hints):
pass
7 changes: 5 additions & 2 deletions lib/gpt/core/local_stencil/matrix_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def parse(c):


class matrix_vector:
def __init__(self, lat_matrix, lat_vector, points, code, code_parallel_block_size=None):
def __init__(self, lat_matrix, lat_vector, points, code, code_parallel_block_size=None, local=1):
self.points = points
self.code = [parse(c) for c in code]
self.code_parallel_block_size = code_parallel_block_size
Expand All @@ -47,11 +47,14 @@ def __init__(self, lat_matrix, lat_vector, points, code, code_parallel_block_siz
points,
self.code,
code_parallel_block_size,
1
local
)

def __call__(self, matrix_fields, vector_fields):
cgpt.stencil_matrix_vector_execute(self.obj, matrix_fields, vector_fields)

def __del__(self):
cgpt.stencil_matrix_vector_delete(self.obj)

def data_access_hints(self, *hints):
pass
3 changes: 2 additions & 1 deletion lib/gpt/core/parallel_transport/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def __init__(self, U, code, n_target):
write_fields = list(range(Ntarget))
read_fields = list(range(Ntarget + Ntemporary, Ntarget + Ntemporary + Nd))

self.stencil = g.stencil.matrix(U[0], points.points, write_fields, read_fields, self.code)
self.stencil = g.stencil.matrix(U[0], points.points, self.code)
self.stencil.data_access_hints(write_fields, read_fields, [])

def __call__(self, U):
T = [g.lattice(U[0]) for i in range(self.Ntarget)]
Expand Down
1 change: 1 addition & 0 deletions lib/gpt/core/stencil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
from gpt.core.stencil.matrix import matrix
from gpt.core.stencil.matrix_vector import matrix_vector
17 changes: 12 additions & 5 deletions lib/gpt/core/stencil/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


class matrix_padded:
def __init__(self, lat, points, write_fields, read_fields, code, code_parallel_block_size=None):
def __init__(self, lat, points, code, code_parallel_block_size=None):
margin = [0] * lat.grid.nd
for p in points:
for i in range(lat.grid.nd):
Expand All @@ -32,11 +32,17 @@ def __init__(self, lat, points, write_fields, read_fields, code, code_parallel_b
self.local_stencil = g.local_stencil.matrix(
self.padding(lat), points, code, code_parallel_block_size
)
self.write_fields = write_fields
self.read_fields = read_fields
self.write_fields = None
self.verbose_performance = g.default.is_verbose("stencil_performance")

def data_access_hints(self, write_fields, read_fields, cache_fields):
self.write_fields = write_fields
self.read_fields = read_fields
self.cache_fields = cache_fields

Check failure on line 42 in lib/gpt/core/stencil/matrix.py

View workflow job for this annotation

GitHub Actions / lint

W293:blank line contains whitespace
def __call__(self, *fields):
if self.write_fields is None:
raise Exception("Generalized matrix stencil needs more information. Call stencil.data_access_hints.")
if self.verbose_performance:
t = g.timer("stencil.matrix")
t("create fields")
Expand All @@ -62,13 +68,14 @@ def __call__(self, *fields):
if self.verbose_performance:
t()
g.message(t)
# todo: make use of cache_fields

Check failure on line 72 in lib/gpt/core/stencil/matrix.py

View workflow job for this annotation

GitHub Actions / lint

W293:blank line contains whitespace

def matrix(lat, points, write_fields, read_fields, code, code_parallel_block_size=None):
def matrix(lat, points, code, code_parallel_block_size=None):
# check if all points are cartesian
for p in points:
if len([s for s in p if s != 0]) > 1:
return matrix_padded(
lat, points, write_fields, read_fields, code, code_parallel_block_size
lat, points, code, code_parallel_block_size
)
return g.local_stencil.matrix(lat, points, code, code_parallel_block_size, local=0)
27 changes: 27 additions & 0 deletions lib/gpt/core/stencil/matrix_vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# GPT - Grid Python Toolkit
# Copyright (C) 2023 Christoph Lehner ([email protected], https://github.com/lehner/gpt)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import gpt as g


def matrix_vector(lat_matrix, lat_vector, points, code, code_parallel_block_size=None):
# check if all points are cartesian
for p in points:
if len([s for s in p if s != 0]) > 1:
raise Exception("General stencil matrix_vector not yet implemented")
return g.local_stencil.matrix_vector(lat_matrix, lat_vector, points, code, code_parallel_block_size, local=0)
35 changes: 19 additions & 16 deletions tests/core/stencil.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,13 @@ def stencil_cshift(src, direction1, direction2):
stencil = g.stencil.matrix(
src,
[direction1, direction2, (0, 0, 0, 0)],
[0],
[1, 2],
[
{"target": 0, "accumulate": -1, "weight": 1.0, "factor": [(1, 0, 0)]},
{"target": 0, "accumulate": 0, "weight": 1.0, "factor": [(2, 1, 0)]},
{"target": 0, "accumulate": 0, "weight": 1.0, "factor": [(2, 2, 0)]},
],
)
stencil.data_access_hints([0], [1,2,3,4], [])
dst = g.lattice(src)
stencil(dst, src, src)
return dst
Expand Down Expand Up @@ -103,11 +102,25 @@ def stencil_cshift(src, direction1, direction2):

pval = 2 * g.sum(g.trace(Ps)).real / P.grid.gsites / 4 / 3 / 3

eps = abs(Pref - pval)
g.message(f"Stencil plaquette (local + padding): {pval} versus reference {Pref}: {eps}")
assert eps < 1e-14

stencil_plaquette = g.stencil.matrix(
P,
[(0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)],
code,
)

stencil_plaquette(Ps, *U)
pval = 2 * g.sum(g.trace(Ps)).real / P.grid.gsites / 4 / 3 / 3

eps = abs(Pref - pval)
g.message(f"Stencil plaquette: {pval} versus reference {Pref}: {eps}")
assert eps < 1e-14



# run again for benchmark:
# t = g.timer("test")
# t("halo exchange")
Expand Down Expand Up @@ -165,15 +178,9 @@ def stencil_cshift(src, direction1, direction2):
rng.cnormal(src)
cov = g.covariant.shift(U, boundary_phases=[1.0, 1.0, 1.0, 1.0])
for mu in range(4):
pad_U = g.padded_local_fields(U, list(evec[mu]))
pad_src = g.padded_local_fields(src, list(evec[mu]))

p_U = pad_U(U)
p_src = pad_src(src)

st = g.local_stencil.matrix_vector(
p_U[0],
p_src,
st = g.stencil.matrix_vector(
U[0],
src,
[(0, 0, 0, 0), evec[mu], nevec[mu]],
[
{
Expand Down Expand Up @@ -206,15 +213,11 @@ def stencil_cshift(src, direction1, direction2):
def lap(dst, src):
dst @= -2.0 * src + cov.forward[mu] * src + cov.backward[mu] * src

p_dst = g.lattice(p_src)
dst = g.lattice(src)

ref = g.lattice(src)
stv = g.lattice(src)

lap(ref, src)
st(p_U, [p_dst, p_src])
pad_src.extract(stv, p_dst)
st(U, [stv, src])

eps2 = g.norm2(stv - ref)
g.message(f"Stencil covariant laplace versus cshift version: {eps2}")
Expand Down

0 comments on commit 0aa1bd7

Please sign in to comment.