From 308cf7dca0db1f78d83a4269c436d098314ecc03 Mon Sep 17 00:00:00 2001 From: "Xinsheng (Shawn) Qin" Date: Thu, 6 Feb 2020 20:49:51 -0800 Subject: [PATCH 1/4] update cuda version for doppio to use cuda 10.0 --- src/Makefile.common.GPU | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Makefile.common.GPU b/src/Makefile.common.GPU index fe1c7ce..3a990b2 100644 --- a/src/Makefile.common.GPU +++ b/src/Makefile.common.GPU @@ -152,9 +152,9 @@ ifeq ($(USE_CUDA),TRUE) ALL_LFLAGS += -Mcuda=cuda9.1,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc ALL_CPPFLAGS += -Mcuda=cuda9.1,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc else ifeq ($(machine_name), doppio) - ALL_FFLAGS += -Mcuda=cuda9.2,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda9.2,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda9.2,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_FFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_LFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_CPPFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc else ifeq ($(machine_name), rhea) ALL_FFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc ALL_LFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc From 3422408b2b51d79658224af2a7ab1ef785bfa396 Mon Sep 17 00:00:00 2001 From: "Xinsheng (Shawn) Qin" Date: Thu, 6 Feb 2020 22:18:06 -0800 Subject: [PATCH 2/4] use environment variable CUDA_VERSION to set which version of CUDA to use in Makefile --- src/Makefile.common.GPU | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Makefile.common.GPU b/src/Makefile.common.GPU index 3a990b2..9e138a3 100644 --- a/src/Makefile.common.GPU +++ b/src/Makefile.common.GPU @@ -41,6 +41,8 @@ endif # CUDA nvcc compiler CLAW_NVCC ?= $(CUDA_PATH)/bin/nvcc +CUDA_VER ?= $(CUDA_VERSION) + LINK ?= $(CLAW_FC) @@ -160,9 +162,9 @@ ifeq ($(USE_CUDA),TRUE) ALL_LFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc ALL_CPPFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc else # default compilation flags if the machine is none of the above - ALL_FFLAGS += -Mcuda=cuda9.2,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda9.2,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda9.2,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_FFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_LFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_CPPFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc endif ALL_LIBRARIES += -lcudafor -lcuda ifeq ($(VERBOSE), TRUE) From 2bc51c780e15e1e2e73de5bbd93a42e927c29d38 Mon Sep 17 00:00:00 2001 From: "Xinsheng (Shawn) Qin" Date: Mon, 10 Feb 2020 20:51:36 -0800 Subject: [PATCH 3/4] Let Makefile.common.GPU use bash environment variables CUDA_VERSION and CUDA_CC. CUDA_VERSION should be set to the version "nvcc --version" returns. CUDA_CC should be set to the compute capability supported by the machine you compile this code on. The users are expected to set the two environment variables. They will be passed to the compiler as compilation flags: ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC) --- src/Makefile.common.GPU | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Makefile.common.GPU b/src/Makefile.common.GPU index 9e138a3..82e4d82 100644 --- a/src/Makefile.common.GPU +++ b/src/Makefile.common.GPU @@ -41,9 +41,6 @@ endif # CUDA nvcc compiler CLAW_NVCC ?= $(CUDA_PATH)/bin/nvcc -CUDA_VER ?= $(CUDA_VERSION) - - LINK ?= $(CLAW_FC) # Path to version of python to use: May need to use something other than @@ -150,21 +147,26 @@ ifeq ($(USE_CUDA),TRUE) endif ALL_DEFINES += -DCUDA ifeq ($(machine_name), titan) - ALL_FFLAGS += -Mcuda=cuda9.1,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda9.1,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda9.1,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + CUDA_VERSION ?= 9.1 + CUDA_CC ?= cc35 else ifeq ($(machine_name), doppio) - ALL_FFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda10.0,cc60,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + CUDA_VERSION ?= 10.0 + CUDA_CC ?= cc60 else ifeq ($(machine_name), rhea) - ALL_FFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda8.0,cc35,maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + CUDA_VERSION ?= 8.0 + CUDA_CC ?= cc35 else # default compilation flags if the machine is none of the above - ALL_FFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=$(CUDA_VER),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + CUDA_VERSION ?= 10.0 + endif + ifeq ($(origin CUDA_CC),undefined) # check if CUDA_CC is defined in bash environment + # don't specify compute capability if CUDA_CC is not set + ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_LFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_CPPFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + else + ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_LFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_CPPFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc endif ALL_LIBRARIES += -lcudafor -lcuda ifeq ($(VERBOSE), TRUE) From b02122e4eb636722e4ff898472312dbf5ebcb550 Mon Sep 17 00:00:00 2001 From: "Xinsheng (Shawn) Qin" Date: Tue, 3 Mar 2020 00:32:23 -0800 Subject: [PATCH 4/4] update Makefile.common.GPU to use CUDA-relevant variables from a local Makefile --- src/Makefile.common.GPU | 83 ++++++----------------------------------- 1 file changed, 12 insertions(+), 71 deletions(-) diff --git a/src/Makefile.common.GPU b/src/Makefile.common.GPU index 82e4d82..5c44e45 100644 --- a/src/Makefile.common.GPU +++ b/src/Makefile.common.GPU @@ -1,39 +1,15 @@ # CPP compiler CC ?= g++ -# for using some pre-set compiler options on specific machines -# you can add your own here -host_name := $(shell hostname -f) -ifeq ($(findstring titan, $(host_name)), titan) - machine_name := titan -else ifeq ($(findstring doppio, $(host_name)), doppio) - machine_name := doppio -else ifeq ($(findstring rhea, $(host_name)), rhea) - machine_name := rhea -endif - - # if FC is set to pgi, we use pgi compiler ifeq ($(FC),pgi) - ifeq ($(machine_name), titan) - CLAW_FC ?= ftn - else ifeq ($(machine_name), rhea) - CLAW_FC ?= pgfortran - else - CLAW_FC ?= pgfortran - endif + CLAW_FC ?= pgfortran else CLAW_FC ?= $(FC) endif # if CC is set to pgi, we use pgi compiler ifeq ($(CC),pgi) - ifeq ($(machine_name), titan) - CLAW_CC ?= CC - else ifeq ($(machine_name), rhea) - CLAW_CC ?= pgc++ - else - CLAW_CC ?= pgc++ - endif + CLAW_CC ?= pgc++ else CLAW_CC ?= $(CC) endif @@ -128,7 +104,7 @@ ALL_INCLUDE += $(addprefix -I,$(INCLUDE)) ALL_INCLUDE += $(addprefix -I,$(MODULE_PATHS)) $(addprefix -L,$(LIB_PATHS)) ifeq ($(USE_CUDA), TRUE) ifndef CUDA_PATH - $(error CUDA_PATH Environment variable is undefined) + $(error CUDA_PATH is not defined) endif ALL_INCLUDE += -I$(CUDA_PATH)/include ALL_LIBRARIES += -L$(CUDA_PATH)/lib64 @@ -146,28 +122,9 @@ ifeq ($(USE_CUDA),TRUE) $(error You must set FC to pgi to use CUDA) endif ALL_DEFINES += -DCUDA - ifeq ($(machine_name), titan) - CUDA_VERSION ?= 9.1 - CUDA_CC ?= cc35 - else ifeq ($(machine_name), doppio) - CUDA_VERSION ?= 10.0 - CUDA_CC ?= cc60 - else ifeq ($(machine_name), rhea) - CUDA_VERSION ?= 8.0 - CUDA_CC ?= cc35 - else # default compilation flags if the machine is none of the above - CUDA_VERSION ?= 10.0 - endif - ifeq ($(origin CUDA_CC),undefined) # check if CUDA_CC is defined in bash environment - # don't specify compute capability if CUDA_CC is not set - ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda$(CUDA_VERSION),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - else - ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_LFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - ALL_CPPFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc - endif + ALL_FFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_LFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc + ALL_CPPFLAGS += -Mcuda=cuda$(CUDA_VERSION),$(CUDA_CC),maxregcount:80 -Mcuda=lineinfo -Mcuda=rdc ALL_LIBRARIES += -lcudafor -lcuda ifeq ($(VERBOSE), TRUE) ALL_FFLAGS += -Mcuda=ptxinfo @@ -257,7 +214,7 @@ else ifeq ($(CLAW_FC),ifort) MODULE_FLAG = -module OMP_FLAG = -openmp # TODO: add flags for the intel compiler -else ifeq ($(CLAW_FC),$(filter $(CLAW_FC),pgfortran ftn)) # CLAW_FC is either pgfortran or ftn +else ifeq ($(CLAW_FC),pgfortran) # Note that there shoud be a space after this flag MODULE_FLAG = -module ifeq ($(DEBUG),TRUE) @@ -273,9 +230,6 @@ else ifeq ($(CLAW_FC),$(filter $(CLAW_FC),pgfortran ftn)) # CLAW_FC is either pg ifeq ($(VERBOSE), TRUE) ALL_FFLAGS += -Manno -Mkeepasm endif - ifeq ($(machine_name), rhea) - ALL_LIBRARIES += -L/ccs/compilers/gcc/rhel6-x86_64/5.3.0/lib64 - endif else $(error Unrecognized compiler: $(CLAW_FC)) endif @@ -304,19 +258,11 @@ ifeq ($(USE_CUDA), TRUE) ifneq ($(DEBUG),TRUE) ALL_NVCCFLAGS += -O3 -Xcompiler='-O3' endif - ifeq ($(machine_name), titan) - ALL_NVCCFLAGS += --generate-code arch=compute_35,code=compute_35 - # below are equivalant to the line above - # ALL_NVCCFLAGS += --gpu-architecture=compute_35 - # ALL_NVCCFLAGS += --gpu-code=sm_35 - else ifeq ($(machine_name), doppio) - ALL_NVCCFLAGS += --generate-code arch=compute_60,code=compute_60 - ALL_NVCCFLAGS += --generate-code arch=compute_61,code=compute_61 - else ifeq ($(machine_name), rhea) - ALL_NVCCFLAGS += --generate-code arch=compute_35,code=compute_35 - else - ALL_NVCCFLAGS += --generate-code arch=compute_35,code=compute_35 - endif + # uncomment these lines and change them accordingly to obtain potential + # better optimization if you know the CUDA compute capability for the + # machine this code is compiled for + # ALL_NVCCFLAGS += --generate-code arch=compute_60,code=compute_60 + # ALL_NVCCFLAGS += --generate-code arch=compute_61,code=compute_61 ifeq ($(VERBOSE), TRUE) ALL_NVCCFLAGS += -keep --resource-usage -Xptxas=--verbose ALL_NVCCFLAGS += -keep-dir built.tmp @@ -330,11 +276,6 @@ $(shell mkdir -p built.tmp) endif endif -ifeq ($(machine_name), doppio) - # TODO: this is just a workaround for imcompatibility of pgi 18.4 and ubuntu 18.4 on Doppio - # We should get rid of this eventually - ALL_CPPFLAGS += -D__CUDACC__ -endif # We may want to set MAKELEVEL here as it is not always set but we know we are # not the first level (the original calling Makefile should be MAKELEVEL = 0)