Skip to content

Commit

Permalink
[KUNLUNXIN] base case add toolkits support (#746)
Browse files Browse the repository at this point in the history
Co-authored-by: w4yne <[email protected]>
  • Loading branch information
w4yne and w4yne authored Sep 14, 2024
1 parent 1c8c140 commit df9b99e
Show file tree
Hide file tree
Showing 15 changed files with 283 additions and 1 deletion.
14 changes: 13 additions & 1 deletion base/configs/host.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,16 @@ CASES:
# metax "main_memory-capacity:C550": "pytorch_2.0"
# metax "computation-FP64:C550": "pytorch_2.0"

# kunlunxin "main_memory-capacity:R300p": "xpytorch029"
# kunlunxin "interconnect-MPI_intraserver:R300p": "pytorch_2.0"
# kunlunxin "interconnect-P2P_intraserver:R300p": "pytorch_2.0"
# kunlunxin "interconnect-MPI_interserver:R300p": "pytorch_2.0"
# kunlunxin "interconnect-P2P_interserver:R300p": "pytorch_2.0"
# kunlunxin "interconnect-h2d:R300p": "pytorch_2.0"
# kunlunxin "main_memory-bandwidth:R300p": "pytorch_2.0"
# kunlunxin "main_memory-capacity:R300p": "pytorch_2.0"
# kunlunxin "computation-FP32:R300p": "pytorch_2.0"
# kunlunxin "computation-FP16:R300p": "pytorch_2.0"
# kunlunxin "computation-BF16:R300p": "pytorch_2.0"
# kunlunxin "computation-INT8:R300p": "pytorch_2.0"
# kunlunxin "computation-TF32:R300p": "pytorch_2.0"

17 changes: 17 additions & 0 deletions base/toolkits/computation-BF16/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XBLAS_PATH=/opt/xhpc/xblas
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./gemm
make clean

popd
17 changes: 17 additions & 0 deletions base/toolkits/computation-FP16/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XBLAS_PATH=/opt/xhpc/xblas
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./gemm
make clean

popd
17 changes: 17 additions & 0 deletions base/toolkits/computation-FP32/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XBLAS_PATH=/opt/xhpc/xblas
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./gemm
make clean

popd
17 changes: 17 additions & 0 deletions base/toolkits/computation-INT8/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XBLAS_PATH=/opt/xhpc/xblas
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./gemm
make clean

popd
17 changes: 17 additions & 0 deletions base/toolkits/computation-TF32/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XBLAS_PATH=/opt/xhpc/xblas
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./gemm
make clean

popd
39 changes: 39 additions & 0 deletions base/toolkits/interconnect-MPI_interserver/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

set -x

/etc/init.d/ssh start
/etc/init.d/ssh status
sleep 10

#hosts
hosts=$(cat "../../../../configs/host.yaml" | egrep -v '^\s*#' | grep HOSTS: | cut -d: -f2| perl -F, -lne '/(\d+\.\d+\.\d+\.\d+)/ && push @h,$1.":8" foreach @F; print join(",", @h)')
n=$(($(echo $hosts| sed -e 's/,/\n/g'| wc -l)*8))

TOOL=all_reduce
LOG=_${TOOL}.log.$$
PERF=/opt/xccl/perf/${TOOL}

# FIXME: hard code hostname, need graceful impl.
if [[ w"$HOSTNAME" != w"p-perf-kunlun-01" ]]; then
echo "launch mpirun only on first node, exiting.\n"
exit
fi

mpirun -hosts "${hosts}" -n $n $PERF \
--nxpus $n \
--warmup_iters 20 \
--iters 2000 \
--minbytes 256m \
--maxbytes 256m \
--op_type sum \
--data_type float \
-c 0 | tee $LOG

algbw=$(tail -n 1 ${LOG} | awk '{print $6}')
busbw=$(tail -n 1 ${LOG} | awk '{print $NF}')
algbw_bi=$(python3 -c "print(float($algbw) * 2)")
busbw_bi=$(python3 -c "print(float($busbw) * 2)")
echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw} GB/s"
rm -f $LOG
rm -f $HOSTFILE
22 changes: 22 additions & 0 deletions base/toolkits/interconnect-MPI_intraserver/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

TOOL=all_reduce
LOG=_${TOOL}.log.$$
PERF=/opt/xccl/perf/${TOOL}

$PERF \
--nxpus 8 \
--warmup_iters 20 \
--iters 20000 \
--minbytes 128m \
--maxbytes 128m \
--op_type sum \
--data_type float \
-c 0 | tee $LOG

algbw=$(tail -n 1 ${LOG} | awk '{print $6}')
busbw=$(tail -n 1 ${LOG} | awk '{print $NF}')
algbw_bi=$(python3 -c "print(float($algbw) * 2)")
busbw_bi=$(python3 -c "print(float($busbw) * 2)")
echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw_bi} GB/s"
rm -f $LOG
39 changes: 39 additions & 0 deletions base/toolkits/interconnect-P2P_interserver/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

set -x

/etc/init.d/ssh start
/etc/init.d/ssh status
sleep 10

#hosts
hosts=$(cat "../../../../configs/host.yaml" | egrep -v '^\s*#' | grep HOSTS: | cut -d: -f2| perl -F, -lne '/(\d+\.\d+\.\d+\.\d+)/ && push @h,$1.":1" foreach @F; print join(",", @h)')
n=$(($(echo $hosts| sed -e 's/,/\n/g'| wc -l)*8))

TOOL=sendrecv
LOG=_${TOOL}.log.$$
PERF=/opt/xccl/perf/${TOOL}

# FIXME: hard code hostname, need graceful impl.
if [[ w"$HOSTNAME" != w"p-perf-kunlun-01" ]]; then
echo "launch mpirun only on first node, exiting.\n"
exit
fi

mpirun -hosts "${hosts}" -n 2 $PERF \
--nxpus 2\
--warmup_iters 20 \
--iters 2000 \
--minbytes 256m \
--maxbytes 256m \
--op_type sum \
--data_type float \
-c 0 | tee $LOG

algbw=$(tail -n 1 ${LOG} | awk '{print $6}')
busbw=$(tail -n 1 ${LOG} | awk '{print $NF}')
algbw_bi=$(python3 -c "print(float($algbw) * 2)")
busbw_bi=$(python3 -c "print(float($busbw) * 2)")
echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw} GB/s"
rm -f $LOG
rm -f $HOSTFILE
22 changes: 22 additions & 0 deletions base/toolkits/interconnect-P2P_intraserver/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

TOOL=sendrecv
LOG=_${TOOL}.log.$$
PERF=/opt/xccl/perf/${TOOL}

$PERF \
--nxpus 8 \
--warmup_iters 20 \
--iters 20000 \
--minbytes 128m \
--maxbytes 128m \
--op_type sum \
--data_type float \
-c 0 | tee $LOG

algbw=$(tail -n 1 ${LOG} | awk '{print $6}')
busbw=$(tail -n 1 ${LOG} | awk '{print $NF}')
algbw_bi=$(python3 -c "print(float($algbw) * 2)")
busbw_bi=$(python3 -c "print(float($busbw) * 2)")
echo "[FlagPerf Result]interconnect-MPI_intraserver-bandwidth=${busbw_bi} GB/s"
rm -f $LOG
16 changes: 16 additions & 0 deletions base/toolkits/interconnect-h2d/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

TOOL=test_dma
LOG=_${TOOL}.log.$$
PERF=/opt/xre/tools/$TOOL
DEV=0
SIZE=$((1024*1024*1024))

$PERF \
--loop 100 \
$DEV \
$SIZE | tee $LOG

busbw=$(cat ${LOG} | grep -A 4 HOST_TO_DEVICE | tail -1 | cut -d: -f2 | sed -e 's/ //g')
echo "[FlagPerf Result] interconnect-h2d bandwidth=$busbw GB/s"
rm -f $LOG
17 changes: 17 additions & 0 deletions base/toolkits/main_memory-bandwidth/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

CASE=$(readlink -f ../.. | awk -F/ '{print $NF}')
pushd /opt/util/examples/$CASE

make clean

export XRE_PATH=/opt/xre
export XDNN_PATH=/opt/xhpc/xdnn
export CXX=g++
export XTDK_PATH=/opt/xtdk/
export LINK_TYPE=dynamic

make && ./bandwidth
make clean

popd
8 changes: 8 additions & 0 deletions base/toolkits/main_memory-capacity/kunlunxin/R300p/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

TOOL=xpu-smi
LOG=_${TOOL}.log.$$
PERF=/opt/xre/bin/$TOOL

mem=$($PERF -m | head -1 | awk '{print $19}')
echo "[FlagPerf Result] main_memory-capacity=$mem MiB"
9 changes: 9 additions & 0 deletions base/vendors/kunlunxin/pytorch_2.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM klx-flagperf-base:latest
RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"
RUN /bin/bash -c "uname -a"
RUN /bin/bash -c alias python3=python
ENV PATH /root/miniconda/envs/python38_torch201_cuda/bin:$PATH

RUN pip3 install loguru
RUN pip3 install schedule
RUN pip3 install munch
13 changes: 13 additions & 0 deletions base/vendors/kunlunxin/pytorch_2.0/pytorch_2.0_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

set -x

pip install pytest loguru schedule

/etc/init.d/ssh start

# xpytorch install
cd /opt/xpytorch && bash xpytorch-cp38-torch201-ubuntu2004-x64.run
CUDART_DUMMY_REGISTER=1 python -m torch_xmlir --doctor
CUDART_DUMMY_REGISTER=1 python -c "import torch; print(torch.rand(512, 128).cuda())"

0 comments on commit df9b99e

Please sign in to comment.