diff --git a/lib/color_spinor_util.in.cu b/lib/color_spinor_util.in.cu index a268a4b16a..71664614cf 100644 --- a/lib/color_spinor_util.in.cu +++ b/lib/color_spinor_util.in.cu @@ -378,8 +378,6 @@ namespace quda { void genericPrintVector(const ColorSpinorField &a, int parity, unsigned int x_cb, int rank) { - if (rank != comm_rank()) return; - ColorSpinorParam param(a); param.location = QUDA_CPU_FIELD_LOCATION; param.create = QUDA_COPY_FIELD_CREATE; @@ -388,6 +386,8 @@ namespace quda { std::unique_ptr clone_a = !host_clone ? nullptr : std::make_unique(param); const ColorSpinorField &a_ = !host_clone ? a : *clone_a.get(); + if (rank != comm_rank()) return; // rank returns after potential copy to host to prevent tuning hang + switch (a.Precision()) { case QUDA_DOUBLE_PRECISION: genericPrintVector(a_, parity, x_cb); break; case QUDA_SINGLE_PRECISION: genericPrintVector(a_, parity, x_cb); break; diff --git a/lib/gauge_norm.in.cu b/lib/gauge_norm.in.cu index 08cb5bf2ca..ddf59287f4 100644 --- a/lib/gauge_norm.in.cu +++ b/lib/gauge_norm.in.cu @@ -159,8 +159,6 @@ namespace quda { void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, int rank) { - if (rank != comm_rank()) return; - GaugeFieldParam param(a); param.field = const_cast(&a); param.location = QUDA_CPU_FIELD_LOCATION; @@ -172,6 +170,8 @@ namespace quda { std::unique_ptr clone_a = !host_clone ? nullptr : std::make_unique(param); const GaugeField &a_ = !host_clone ? a : *clone_a.get(); + if (rank != comm_rank()) return; // rank returns after potential copy to host to prevent tuning hang + switch (a.Precision()) { case QUDA_DOUBLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; case QUDA_SINGLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break;