diff --git a/src/collectives.c b/src/collectives.c index ee51f869..586179a8 100644 --- a/src/collectives.c +++ b/src/collectives.c @@ -613,8 +613,19 @@ shmem_internal_op_to_all_linear(void *target, const void *source, size_t count, SHMEM_WAIT_UNTIL(pSync, SHMEM_CMP_EQ, 0); /* send data, ack, and wait for completion */ +#ifdef DISABLE_NONFETCH_AMO + /* FIXME: This is a temporary workaround to resolve a known issue with non-fetching AMOs when using + the CXI provider */ + unsigned long long tmp_fetch = 0; + for (size_t i =0; i < count; i++) { + shmem_internal_fetch_atomic(SHMEM_CTX_DEFAULT, ((uint8_t *) target) + (i * type_size), + ((uint8_t *) source) + (i * type_size), &tmp_fetch, type_size, + PE_start, op, datatype); + } +#else shmem_internal_atomicv(SHMEM_CTX_DEFAULT, target, source, count * type_size, PE_start, op, datatype, &completion); +#endif shmem_internal_put_wait(SHMEM_CTX_DEFAULT, &completion); shmem_internal_fence(SHMEM_CTX_DEFAULT); @@ -817,10 +828,21 @@ shmem_internal_op_to_all_tree(void *target, const void *source, size_t count, si SHMEM_WAIT_UNTIL(pSync + 1, SHMEM_CMP_EQ, 0); /* send data, ack, and wait for completion */ +#ifdef DISABLE_NONFETCH_AMO + /* FIXME: This is a temporary workaround to resolve a known issue with non-fetching AMOs when using + the CXI provider */ + unsigned long long tmp_fetch = 0; + for (size_t i = 0; i < count; i++) { + shmem_internal_fetch_atomic(SHMEM_CTX_DEFAULT, ((uint8_t *) target) + (i * type_size), + (num_children == 0) ? ((uint8_t *) source) + (i * type_size) : ((uint8_t *) target) + (i * type_size), + &tmp_fetch, type_size, parent, op, datatype); + } +#else shmem_internal_atomicv(SHMEM_CTX_DEFAULT, target, (num_children == 0) ? source : target, count * type_size, parent, op, datatype, &completion); +#endif shmem_internal_put_wait(SHMEM_CTX_DEFAULT, &completion); shmem_internal_fence(SHMEM_CTX_DEFAULT); diff --git a/src/shmem_comm.h b/src/shmem_comm.h index 6143a38b..bf44ec3e 100644 --- a/src/shmem_comm.h +++ b/src/shmem_comm.h @@ -245,6 +245,8 @@ shmem_internal_atomic(shmem_ctx_t ctx, void *target, const void *source, size_t shmem_shr_transport_atomic(ctx, target, source, len, pe, op, datatype); } else { #ifdef DISABLE_NONFETCH_AMO + /* FIXME: This is a temporary workaround to resolve a known issue with non-fetching AMOs when using + the CXI provider */ unsigned long long tmp_fetch = 0; shmem_transport_fetch_atomic((shmem_transport_ctx_t *)ctx, target, source, &tmp_fetch, len, pe, op, datatype); @@ -284,6 +286,8 @@ shmem_internal_atomic_set(shmem_ctx_t ctx, void *target, const void *source, siz shmem_shr_transport_atomic_set(ctx, target, source, len, pe, datatype); } else { #ifdef DISABLE_NONFETCH_AMO + /* FIXME: This is a temporary workaround to resolve a known issue with non-fetching AMOs when using + the CXI provider */ unsigned long long tmp_fetch = 0; shmem_transport_fetch_atomic((shmem_transport_ctx_t *)ctx, target, source, &tmp_fetch, len, pe, FI_ATOMIC_WRITE, datatype);