-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc7de3f
commit 9896c7d
Showing
14 changed files
with
2,827 additions
and
482 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
diff --git a/backends/ref/ceed-ref-restriction.c b/backends/ref/ceed-ref-restriction.c | ||
index 44e9081e..09bdb6fd 100644 | ||
--- a/backends/ref/ceed-ref-restriction.c | ||
+++ b/backends/ref/ceed-ref-restriction.c | ||
@@ -233,7 +233,7 @@ static inline int CeedElemRestrictionApplyOffsetTranspose_Ref_Core(CeedElemRestr | ||
CeedScalar vv_loc; | ||
|
||
vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; | ||
- CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; | ||
+ vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; | ||
} | ||
} | ||
} | ||
@@ -257,7 +257,7 @@ static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRes | ||
CeedScalar vv_loc; | ||
|
||
vv_loc = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); | ||
- CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; | ||
+ vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; | ||
} | ||
} | ||
} | ||
@@ -287,7 +287,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedEle | ||
impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
for (n = 1; n < elem_size - 1; n++) { | ||
CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { | ||
@@ -299,7 +299,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedEle | ||
impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
} | ||
CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { | ||
@@ -309,7 +309,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedEle | ||
impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
} | ||
} | ||
@@ -338,7 +338,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core | ||
abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
for (n = 1; n < elem_size - 1; n++) { | ||
CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { | ||
@@ -350,7 +350,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core | ||
abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
} | ||
CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { | ||
@@ -360,7 +360,7 @@ static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core | ||
abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); | ||
} | ||
for (CeedInt j = 0; j < block_end; j++) { | ||
- CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
+ vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; | ||
} | ||
} | ||
} | ||
diff --git a/backends/xsmm/ceed-xsmm-tensor.c b/backends/xsmm/ceed-xsmm-tensor.c | ||
index a64f5e9f..555352ef 100644 | ||
--- a/backends/xsmm/ceed-xsmm-tensor.c | ||
+++ b/backends/xsmm/ceed-xsmm-tensor.c | ||
@@ -30,7 +30,7 @@ static int CeedTensorContractApply_Xsmm(CeedTensorContract contract, CeedInt A, | ||
LIBXSMM_DATATYPE_F64, LIBXSMM_DATATYPE_F64) | ||
: libxsmm_create_gemm_shape(J, A, B, !t_mode ? B : J, B, J, LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32, | ||
LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32); | ||
- const libxsmm_gemmfunction kernel = libxsmm_dispatch_gemm_v2(gemm_shape, (libxsmm_bitfield)(flags), (libxsmm_bitfield)LIBXSMM_GEMM_PREFETCH_NONE); | ||
+ const libxsmm_gemmfunction kernel = libxsmm_dispatch_gemm(gemm_shape, (libxsmm_bitfield)(flags), (libxsmm_bitfield)LIBXSMM_GEMM_PREFETCH_NONE); | ||
libxsmm_gemm_param gemm_param; | ||
|
||
CeedCheck(kernel, ceed, CEED_ERROR_BACKEND, "LIBXSMM kernel failed to build."); | ||
@@ -50,7 +50,7 @@ static int CeedTensorContractApply_Xsmm(CeedTensorContract contract, CeedInt A, | ||
LIBXSMM_DATATYPE_F64, LIBXSMM_DATATYPE_F64) | ||
: libxsmm_create_gemm_shape(C, J, B, C, !t_mode ? B : J, C, LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32, | ||
LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32); | ||
- const libxsmm_gemmfunction kernel = libxsmm_dispatch_gemm_v2(gemm_shape, (libxsmm_bitfield)(flags), (libxsmm_bitfield)LIBXSMM_GEMM_PREFETCH_NONE); | ||
+ const libxsmm_gemmfunction kernel = libxsmm_dispatch_gemm(gemm_shape, (libxsmm_bitfield)(flags), (libxsmm_bitfield)LIBXSMM_GEMM_PREFETCH_NONE); | ||
libxsmm_gemm_param gemm_param; | ||
|
||
CeedCheck(kernel, ceed, CEED_ERROR_BACKEND, "LIBXSMM kernel failed to build."); |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.