From e58b44852eb6ba0474b44c7064a44b31ff5f7c28 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 10 Jan 2025 10:23:26 +0000 Subject: [PATCH 1/2] fix SP for GPU OpenACC SCC-STACK manually implemented variant --- src/cloudsc_gpu/cloudsc_gpu_scc_stack_mod.F90 | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/cloudsc_gpu/cloudsc_gpu_scc_stack_mod.F90 b/src/cloudsc_gpu/cloudsc_gpu_scc_stack_mod.F90 index 2eb1b8a7..c1d7d55c 100644 --- a/src/cloudsc_gpu/cloudsc_gpu_scc_stack_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_gpu_scc_stack_mod.F90 @@ -674,41 +674,41 @@ SUBROUTINE CLOUDSC_SCC_STACK (KIDIA, KFDIA, KLON, KLEV, PTSPHY, PT, PQ, TENDENCY POINTER(IP_ZFOEELIQT, ZFOEELIQT) YLSTACK_L = YDSTACK_L IP_ZFOEALFA = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*(KLEV + 1)*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*(KLEV + 1)*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZTP1 = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZLI = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZA = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZAORIG = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZLIQFRAC = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZICEFRAC = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQX = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQX0 = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZPFPLSX = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*(KLEV + 1)*NCLV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*(KLEV + 1)*NCLV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZLNEG = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQXN2D = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*NCLV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQSMIX = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQSLIQ = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZQSICE = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZFOEEWMT = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZFOEEW = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) IP_ZFOEELIQT = YLSTACK_L - YLSTACK_L = YLSTACK_L + KLON*KLEV*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + YLSTACK_L = YLSTACK_L + KLON*KLEV*C_SIZEOF(REAL(1, kind=JPRB)) !$acc data present( PT, PQ, TENDENCY_TMP_T, TENDENCY_TMP_Q, TENDENCY_TMP_A, TENDENCY_TMP_CLD, TENDENCY_LOC_T, TENDENCY_LOC_Q, & !$acc & TENDENCY_LOC_A, TENDENCY_LOC_CLD, PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, PHRSW, PHRLW, PVERVEL, PAP, PAPH, PLSM, LDCUM, & !$acc & KTYPE, PLU, PLUDE, PSNDE, PMFU, PMFD, PA, PCLV, PSUPSAT, PLCRIT_AER, PICRIT_AER, PRE_ICE, PCCN, PNICE, PCOVPTOT, & From 7b211ead9c5ef3a31c3a5105c784c5cae15cf4a1 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Mon, 20 Jan 2025 12:16:28 +0000 Subject: [PATCH 2/2] SCC-STACK (single-precision) adapt size of stack as well --- src/cloudsc_gpu/cloudsc_driver_gpu_scc_stack_mod.F90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_stack_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_stack_mod.F90 index cf05cc5f..9a7dad23 100644 --- a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_stack_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_stack_mod.F90 @@ -97,12 +97,12 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_STACK (NUMOMP, NPROMA, NLEV, NGPTOT, NGPTOTG, INTEGER(KIND=8) :: ISTSZ REAL(KIND=JPRB), ALLOCATABLE :: ZSTACK(:, :) INTEGER(KIND=8) :: YLSTACK_L - ISTSZ = (NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + 13*NLEV*NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + & - & 5*NLEV*NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8)*NCLV + NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8)*NCLV) / & - & MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) - IF (.not.(MOD(NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + 13*NLEV*NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8) + & - & 5*NLEV*NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8)*NCLV + NPROMA*MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8)*NCLV, & - & MAX(C_SIZEOF(REAL(1, kind=JPRB)), 8)) == 0)) ISTSZ = ISTSZ + 1 + ISTSZ = (NPROMA*C_SIZEOF(REAL(1, kind=JPRB)) + 13*NLEV*NPROMA*C_SIZEOF(REAL(1, kind=JPRB)) + & + & 5*NLEV*NPROMA*C_SIZEOF(REAL(1, kind=JPRB))*NCLV + NPROMA*C_SIZEOF(REAL(1, kind=JPRB))*NCLV) / & + & C_SIZEOF(REAL(1, kind=JPRB)) + IF (.not.(MOD(NPROMA*C_SIZEOF(REAL(1, kind=JPRB)) + 13*NLEV*NPROMA*C_SIZEOF(REAL(1, kind=JPRB)) + & + & 5*NLEV*NPROMA*C_SIZEOF(REAL(1, kind=JPRB))*NCLV + NPROMA*C_SIZEOF(REAL(1, kind=JPRB))*NCLV, & + & C_SIZEOF(REAL(1, kind=JPRB))) == 0)) ISTSZ = ISTSZ + 1 ALLOCATE (ZSTACK(ISTSZ, NGPBLKS)) !$acc data create( ZSTACK )