diff --git a/base/configs/host.yaml b/base/configs/host.yaml index 6dfc82c34..b14e17a73 100755 --- a/base/configs/host.yaml +++ b/base/configs/host.yaml @@ -79,4 +79,16 @@ CASES: # metax "main_memory-capacity:C550": "pytorch_2.0" # metax "computation-FP64:C550": "pytorch_2.0" - # kunlunxin "main_memory-capacity:R300p": "xpytorch029" +# kunlunxin "interconnect-MPI_intraserver:R300p": "pytorch_2.0" +# kunlunxin "interconnect-P2P_intraserver:R300p": "pytorch_2.0" +# kunlunxin "interconnect-MPI_interserver:R300p": "pytorch_2.0" +# kunlunxin "interconnect-P2P_interserver:R300p": "pytorch_2.0" +# kunlunxin "interconnect-h2d:R300p": "pytorch_2.0" +# kunlunxin "main_memory-bandwidth:R300p": "pytorch_2.0" +# kunlunxin "main_memory-capacity:R300p": "pytorch_2.0" +# kunlunxin "computation-FP32:R300p": "pytorch_2.0" +# kunlunxin "computation-FP16:R300p": "pytorch_2.0" +# kunlunxin "computation-BF16:R300p": "pytorch_2.0" +# kunlunxin "computation-INT8:R300p": "pytorch_2.0" +# kunlunxin "computation-TF32:R300p": "pytorch_2.0" + diff --git a/base/toolkits/computation-BF16/kunlunxin/R300p/main.sh b/base/toolkits/computation-BF16/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..c8f525945 --- /dev/null +++ b/base/toolkits/computation-BF16/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XBLAS_PATH=/opt/xhpc/xblas +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./gemm +make clean + +popd diff --git a/base/toolkits/computation-FP16/kunlunxin/R300p/main.sh b/base/toolkits/computation-FP16/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..5387c88c9 --- /dev/null +++ b/base/toolkits/computation-FP16/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XBLAS_PATH=/opt/xhpc/xblas +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./gemm +make clean + +popd diff --git a/base/toolkits/computation-FP32/kunlunxin/R300p/main.sh b/base/toolkits/computation-FP32/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..c8f525945 --- /dev/null +++ b/base/toolkits/computation-FP32/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XBLAS_PATH=/opt/xhpc/xblas +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./gemm +make clean + +popd diff --git a/base/toolkits/computation-INT8/kunlunxin/R300p/main.sh b/base/toolkits/computation-INT8/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..c8f525945 --- /dev/null +++ b/base/toolkits/computation-INT8/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XBLAS_PATH=/opt/xhpc/xblas +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./gemm +make clean + +popd diff --git a/base/toolkits/computation-TF32/kunlunxin/R300p/main.sh b/base/toolkits/computation-TF32/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..c8f525945 --- /dev/null +++ b/base/toolkits/computation-TF32/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XBLAS_PATH=/opt/xhpc/xblas +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./gemm +make clean + +popd diff --git a/base/toolkits/interconnect-MPI_interserver/kunlunxin/R300p/main.sh b/base/toolkits/interconnect-MPI_interserver/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..263393cb0 --- /dev/null +++ b/base/toolkits/interconnect-MPI_interserver/kunlunxin/R300p/main.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -x + +/etc/init.d/ssh start +/etc/init.d/ssh status +sleep 10 + +#hosts +hosts=$(cat "../../../../configs/host.yaml" | egrep -v '^\s*#' | grep HOSTS: | cut -d: -f2| perl -F, -lne '/(\d+\.\d+\.\d+\.\d+)/ && push @h,$1.":8" foreach @F; print join(",", @h)') +n=$(($(echo $hosts| sed -e 's/,/\n/g'| wc -l)*8)) + +TOOL=all_reduce +LOG=_${TOOL}.log.$$ +PERF=/opt/xccl/perf/${TOOL} + +# FIXME: hard code hostname, need graceful impl. +if [[ w"$HOSTNAME" != w"p-perf-kunlun-01" ]]; then + echo "launch mpirun only on first node, exiting.\n" + exit +fi + +mpirun -hosts "${hosts}" -n $n $PERF \ + --nxpus $n \ + --warmup_iters 20 \ + --iters 2000 \ + --minbytes 256m \ + --maxbytes 256m \ + --op_type sum \ + --data_type float \ + -c 0 | tee $LOG + +algbw=$(tail -n 1 ${LOG} | awk '{print $6}') +busbw=$(tail -n 1 ${LOG} | awk '{print $NF}') +algbw_bi=$(python3 -c "print(float($algbw) * 2)") +busbw_bi=$(python3 -c "print(float($busbw) * 2)") +echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw} GB/s" +rm -f $LOG +rm -f $HOSTFILE diff --git a/base/toolkits/interconnect-MPI_intraserver/kunlunxin/R300p/main.sh b/base/toolkits/interconnect-MPI_intraserver/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..4d6f0f516 --- /dev/null +++ b/base/toolkits/interconnect-MPI_intraserver/kunlunxin/R300p/main.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +TOOL=all_reduce +LOG=_${TOOL}.log.$$ +PERF=/opt/xccl/perf/${TOOL} + +$PERF \ + --nxpus 8 \ + --warmup_iters 20 \ + --iters 20000 \ + --minbytes 128m \ + --maxbytes 128m \ + --op_type sum \ + --data_type float \ + -c 0 | tee $LOG + +algbw=$(tail -n 1 ${LOG} | awk '{print $6}') +busbw=$(tail -n 1 ${LOG} | awk '{print $NF}') +algbw_bi=$(python3 -c "print(float($algbw) * 2)") +busbw_bi=$(python3 -c "print(float($busbw) * 2)") +echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw_bi} GB/s" +rm -f $LOG diff --git a/base/toolkits/interconnect-P2P_interserver/kunlunxin/R300p/main.sh b/base/toolkits/interconnect-P2P_interserver/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..9881dad26 --- /dev/null +++ b/base/toolkits/interconnect-P2P_interserver/kunlunxin/R300p/main.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -x + +/etc/init.d/ssh start +/etc/init.d/ssh status +sleep 10 + +#hosts +hosts=$(cat "../../../../configs/host.yaml" | egrep -v '^\s*#' | grep HOSTS: | cut -d: -f2| perl -F, -lne '/(\d+\.\d+\.\d+\.\d+)/ && push @h,$1.":1" foreach @F; print join(",", @h)') +n=$(($(echo $hosts| sed -e 's/,/\n/g'| wc -l)*8)) + +TOOL=sendrecv +LOG=_${TOOL}.log.$$ +PERF=/opt/xccl/perf/${TOOL} + +# FIXME: hard code hostname, need graceful impl. +if [[ w"$HOSTNAME" != w"p-perf-kunlun-01" ]]; then + echo "launch mpirun only on first node, exiting.\n" + exit +fi + +mpirun -hosts "${hosts}" -n 2 $PERF \ + --nxpus 2\ + --warmup_iters 20 \ + --iters 2000 \ + --minbytes 256m \ + --maxbytes 256m \ + --op_type sum \ + --data_type float \ + -c 0 | tee $LOG + +algbw=$(tail -n 1 ${LOG} | awk '{print $6}') +busbw=$(tail -n 1 ${LOG} | awk '{print $NF}') +algbw_bi=$(python3 -c "print(float($algbw) * 2)") +busbw_bi=$(python3 -c "print(float($busbw) * 2)") +echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw} GB/s" +rm -f $LOG +rm -f $HOSTFILE diff --git a/base/toolkits/interconnect-P2P_intraserver/kunlunxin/R300p/main.sh b/base/toolkits/interconnect-P2P_intraserver/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..bdccbad02 --- /dev/null +++ b/base/toolkits/interconnect-P2P_intraserver/kunlunxin/R300p/main.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +TOOL=sendrecv +LOG=_${TOOL}.log.$$ +PERF=/opt/xccl/perf/${TOOL} + +$PERF \ + --nxpus 8 \ + --warmup_iters 20 \ + --iters 20000 \ + --minbytes 128m \ + --maxbytes 128m \ + --op_type sum \ + --data_type float \ + -c 0 | tee $LOG + +algbw=$(tail -n 1 ${LOG} | awk '{print $6}') +busbw=$(tail -n 1 ${LOG} | awk '{print $NF}') +algbw_bi=$(python3 -c "print(float($algbw) * 2)") +busbw_bi=$(python3 -c "print(float($busbw) * 2)") +echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw_bi} GB/s" +rm -f $LOG diff --git a/base/toolkits/interconnect-h2d/kunlunxin/R300p/main.sh b/base/toolkits/interconnect-h2d/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..fa6dd10eb --- /dev/null +++ b/base/toolkits/interconnect-h2d/kunlunxin/R300p/main.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +TOOL=test_dma +LOG=_${TOOL}.log.$$ +PERF=/opt/xre/tools/$TOOL +DEV=0 +SIZE=$((1024*1024*1024)) + +$PERF \ + --loop 100 \ + $DEV \ + $SIZE | tee $LOG + +busbw=$(cat ${LOG} | grep -A 4 HOST_TO_DEVICE | tail -1 | cut -d: -f2 | sed -e 's/ //g') +echo "[FlagPerf Result] interconnect-h2d bandwidth=$busbw GB/s" +rm -f $LOG diff --git a/base/toolkits/main_memory-bandwidth/kunlunxin/R300p/main.sh b/base/toolkits/main_memory-bandwidth/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..1e17632e1 --- /dev/null +++ b/base/toolkits/main_memory-bandwidth/kunlunxin/R300p/main.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CASE=$(readlink -f ../.. | awk -F/ '{print $NF}') +pushd /opt/util/examples/$CASE + +make clean + +export XRE_PATH=/opt/xre +export XDNN_PATH=/opt/xhpc/xdnn +export CXX=g++ +export XTDK_PATH=/opt/xtdk/ +export LINK_TYPE=dynamic + +make && ./bandwidth +make clean + +popd diff --git a/base/toolkits/main_memory-capacity/kunlunxin/R300p/main.sh b/base/toolkits/main_memory-capacity/kunlunxin/R300p/main.sh new file mode 100644 index 000000000..96668a85b --- /dev/null +++ b/base/toolkits/main_memory-capacity/kunlunxin/R300p/main.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +TOOL=xpu-smi +LOG=_${TOOL}.log.$$ +PERF=/opt/xre/bin/$TOOL + +mem=$($PERF -m | head -1 | awk '{print $19}') +echo "[FlagPerf Result] main_memory-capacity=$mem MiB" diff --git a/base/vendors/kunlunxin/pytorch_2.0/Dockerfile b/base/vendors/kunlunxin/pytorch_2.0/Dockerfile new file mode 100644 index 000000000..657fd43e0 --- /dev/null +++ b/base/vendors/kunlunxin/pytorch_2.0/Dockerfile @@ -0,0 +1,9 @@ +FROM klx-flagperf-base:latest +RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple" +RUN /bin/bash -c "uname -a" +RUN /bin/bash -c alias python3=python +ENV PATH /root/miniconda/envs/python38_torch201_cuda/bin:$PATH + +RUN pip3 install loguru +RUN pip3 install schedule +RUN pip3 install munch diff --git a/base/vendors/kunlunxin/pytorch_2.0/pytorch_2.0_install.sh b/base/vendors/kunlunxin/pytorch_2.0/pytorch_2.0_install.sh new file mode 100644 index 000000000..e97ff42e0 --- /dev/null +++ b/base/vendors/kunlunxin/pytorch_2.0/pytorch_2.0_install.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -x + +pip install pytest loguru schedule + +/etc/init.d/ssh start + +# xpytorch install +cd /opt/xpytorch && bash xpytorch-cp38-torch201-ubuntu2004-x64.run +CUDART_DUMMY_REGISTER=1 python -m torch_xmlir --doctor +CUDART_DUMMY_REGISTER=1 python -c "import torch; print(torch.rand(512, 128).cuda())" +