Skip to content

Commit 8ce20f6

Browse files
committed
CANN Backend support
1 parent c95fd4e commit 8ce20f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+4282
-153
lines changed

CMakeLists.txt

+107
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ option(WITH_DNNL "Compile with DNNL backend" OFF)
1212
option(WITH_ACCELERATE "Compile with Accelerate backend" OFF)
1313
option(WITH_OPENBLAS "Compile with OpenBLAS backend" OFF)
1414
option(WITH_RUY "Compile with Ruy backend" OFF)
15+
option(WITH_CANN "Compile with CANN backend" OFF)
1516
option(WITH_CUDA "Compile with CUDA backend" OFF)
1617
option(WITH_CUDNN "Compile with cuDNN backend" OFF)
1718
option(CUDA_DYNAMIC_LOADING "Dynamically load CUDA libraries at runtime" OFF)
@@ -21,6 +22,12 @@ option(BUILD_CLI "Compile the clients" ON)
2122
option(BUILD_TESTS "Compile the tests" OFF)
2223
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
2324

25+
if(WITH_CUDA OR WITH_CUDNN)
26+
if(WITH_CANN)
27+
message( FATAL_ERROR "CANN backend cannot be combined with CUDA or CUDNN!" )
28+
endif ()
29+
endif ()
30+
2431
if(ENABLE_PROFILING)
2532
message(STATUS "Enable profiling support")
2633
add_definitions(-DCT2_ENABLE_PROFILING)
@@ -525,6 +532,105 @@ if (WITH_CUDA)
525532
)
526533
elseif(WITH_CUDNN)
527534
message(FATAL_ERROR "WITH_CUDNN=ON requires WITH_CUDA=ON")
535+
elseif(WITH_CANN)
536+
add_definitions(-DCT2_WITH_CANN)
537+
538+
message(STATUS "ASCEND_TOOLKIT_HOME: $ENV{ASCEND_TOOLKIT_HOME}")
539+
message(STATUS "LD_LIBRARY_PATH: $ENV{LD_LIBRARY_PATH}")
540+
message(STATUS "PYTHONPATH: $ENV{PYTHONPATH}")
541+
message(STATUS "ASCEND_AICPU_PATH: $ENV{ASCEND_AICPU_PATH}")
542+
message(STATUS "ASCEND_OPP_PATH: $ENV{ASCEND_OPP_PATH}")
543+
message(STATUS "TOOLCHAIN_HOME: $ENV{TOOLCHAIN_HOME}")
544+
message(STATUS "ASCEND_HOME_PATH: $ENV{ASCEND_HOME_PATH}")
545+
message(STATUS "PATH: $ENV{PATH}")
546+
547+
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
548+
set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH})
549+
else()
550+
set(ASCEND_DIR /usr/local/Ascend)
551+
endif()
552+
553+
message(STATUS "ASCEND_DIR: ${ASCEND_DIR}")
554+
555+
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
556+
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
557+
set(ASCEND_DRIVER_SHARE_DIR ${ASCEND_DIR}/driver/lib64/share)
558+
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
559+
set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64)
560+
set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64)
561+
set(STATIC_ACL_LIB ${ASCEND_ACL_DIR})
562+
563+
set(ASCEND_MS_RUNTIME_PATH ${ASCEND_RUNTIME_DIR} ${ASCEND_ACL_DIR} ${ASCEND_ATC_DIR})
564+
set(ASCEND_MS_DRIVER_PATH ${ASCEND_DRIVER_DIR} ${ASCEND_DRIVER_COMMON_DIR})
565+
set(ATLAS_RUNTIME_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)
566+
set(ATLAS_RUNTIME_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
567+
set(ATLAS_ACL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/acllib/lib64)
568+
set(ATLAS_ATC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/atc/lib64)
569+
set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR})
570+
571+
set(atlas_graph_lib ${ATLAS_RUNTIME_DIR}/libgraph.so)
572+
set(atlas_ge_runner_lib ${ATLAS_RUNTIME_DIR}/libge_runner.so)
573+
set(atlas_acl_lib ${ATLAS_RUNTIME_DIR}/libascendcl.so)
574+
INCLUDE_DIRECTORIES(${ATLAS_RUNTIME_INC_DIR})
575+
576+
ADD_LIBRARY(ascend_ge SHARED IMPORTED GLOBAL)
577+
SET_PROPERTY(TARGET ascend_ge PROPERTY IMPORTED_LOCATION ${atlas_ge_runner_lib})
578+
579+
ADD_LIBRARY(ascend_graph SHARED IMPORTED GLOBAL)
580+
SET_PROPERTY(TARGET ascend_graph PROPERTY IMPORTED_LOCATION ${atlas_graph_lib})
581+
582+
ADD_LIBRARY(atlas_acl SHARED IMPORTED GLOBAL)
583+
SET_PROPERTY(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib})
584+
585+
set(extern_ascend ascend_ge ascend_graph atlas_acl CACHE INTERNAL "acllib runtime libs")
586+
587+
set(ASCEND_CL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)
588+
589+
set(ascend_hccl_lib ${ASCEND_CL_DIR}/libhccl.so)
590+
set(ascendcl_lib ${ASCEND_CL_DIR}/libascendcl.so)
591+
set(acl_op_compiler_lib ${ASCEND_CL_DIR}/libacl_op_compiler.so)
592+
set(FWKACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
593+
set(ACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/acllib/include)
594+
595+
message(STATUS "FWKACLLIB_INC_DIR ${FWKACLLIB_INC_DIR}")
596+
message(STATUS "ASCEND_CL_DIR ${ASCEND_CL_DIR}")
597+
INCLUDE_DIRECTORIES(${FWKACLLIB_INC_DIR})
598+
INCLUDE_DIRECTORIES(${ACLLIB_INC_DIR})
599+
600+
ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL)
601+
SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib})
602+
603+
ADD_LIBRARY(ascend_hccl SHARED IMPORTED GLOBAL)
604+
SET_PROPERTY(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib})
605+
606+
ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL)
607+
SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib})
608+
609+
set(extern_ascend_cl ascendcl acl_op_compiler CACHE INTERNAL "acltoolkit libs")
610+
611+
list(APPEND SOURCES
612+
src/cann/allocator.cc
613+
src/cann/primitives.cc
614+
src/cann/utils.cc
615+
src/ops/topk_npu.cc
616+
src/ops/dequantize_npu.cc
617+
src/ops/gumbel_max_npu.cc
618+
src/ops/topp_mask_npu.cc
619+
src/ops/multinomial_npu.cc
620+
src/ops/gather_npu.cc
621+
src/ops/conv1d_npu.cc
622+
src/ops/concat_split_slide_npu.cc
623+
src/ops/alibi_add_npu.cc
624+
src/ops/softmax_npu.cc
625+
src/ops/tile_npu.cc
626+
src/ops/rms_norm_npu.cc
627+
src/ops/layer_norm_npu.cc
628+
src/ops/rotary_npu.cc
629+
src/ops/bias_add_npu.cc
630+
src/ops/mean_npu.cc
631+
src/ops/quantize_npu.cc)
632+
add_library(${PROJECT_NAME} ${SOURCES})
633+
list(APPEND LIBRARIES ${extern_ascend} ${extern_ascend_cl})
528634
else()
529635
add_library(${PROJECT_NAME} ${SOURCES})
530636
endif()
@@ -540,6 +646,7 @@ set_property(TARGET ${PROJECT_NAME} APPEND PROPERTY
540646
)
541647

542648
list(APPEND LIBRARIES ${CMAKE_DL_LIBS})
649+
543650
target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBRARIES})
544651
target_include_directories(${PROJECT_NAME} BEFORE
545652
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ The project is production-oriented and comes with [backward compatibility guaran
2525

2626
## Key features
2727

28-
* **Fast and efficient execution on CPU and GPU**<br/>The execution [is significantly faster and requires less resources](#benchmarks) than general-purpose deep learning frameworks on supported models and tasks thanks to many advanced optimizations: layer fusion, padding removal, batch reordering, in-place operations, caching mechanism, etc.
28+
* **Fast and efficient execution on CPU, GPU and NPU**<br/>The execution [is significantly faster and requires less resources](#benchmarks) than general-purpose deep learning frameworks on supported models and tasks thanks to many advanced optimizations: layer fusion, padding removal, batch reordering, in-place operations, caching mechanism, etc.
2929
* **Quantization and reduced precision**<br/>The model serialization and computation support weights with [reduced precision](https://opennmt.net/CTranslate2/quantization.html): 16-bit floating points (FP16), 16-bit brain floating points (BF16), 16-bit integers (INT16), and 8-bit integers (INT8).
3030
* **Multiple CPU architectures support**<br/>The project supports x86-64 and AArch64/ARM64 processors and integrates multiple backends that are optimized for these platforms: [Intel MKL](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html), [oneDNN](https://github.com/oneapi-src/oneDNN), [OpenBLAS](https://www.openblas.net/), [Ruy](https://github.com/google/ruy), and [Apple Accelerate](https://developer.apple.com/documentation/accelerate).
3131
* **Automatic CPU detection and code dispatch**<br/>One binary can include multiple backends (e.g. Intel MKL and oneDNN) and instruction set architectures (e.g. AVX, AVX2) that are automatically selected at runtime based on the CPU information.
32-
* **Parallel and asynchronous execution**<br/>Multiple batches can be processed in parallel and asynchronously using multiple GPUs or CPU cores.
33-
* **Dynamic memory usage**<br/>The memory usage changes dynamically depending on the request size while still meeting performance requirements thanks to caching allocators on both CPU and GPU.
32+
* **Parallel and asynchronous execution**<br/>Multiple batches can be processed in parallel and asynchronously using multiple GPUs, NPUs or CPU cores.
33+
* **Dynamic memory usage**<br/>The memory usage changes dynamically depending on the request size while still meeting performance requirements thanks to caching allocators on all CPU, GPU and NPU.
3434
* **Lightweight on disk**<br/>Quantization can make the models 4 times smaller on disk with minimal accuracy loss.
3535
* **Simple integration**<br/>The project has few dependencies and exposes simple APIs in [Python](https://opennmt.net/CTranslate2/python/overview.html) and C++ to cover most integration needs.
3636
* **Configurable and interactive decoding**<br/>[Advanced decoding features](https://opennmt.net/CTranslate2/decoding.html) allow autocompleting a partial sequence and returning alternatives at a specific location in the sequence.

cli/translator.cc

+7-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ int main(int argc, char* argv[]) {
3030
cxxopts::value<size_t>()->default_value("1"))
3131
("intra_threads", "Number of computation threads (set to 0 to use the default value).",
3232
cxxopts::value<size_t>()->default_value("0"))
33-
("device", "Device to use (can be cpu, cuda, auto).",
33+
("device", "Device to use (can be cpu, cuda, cann, auto).",
3434
cxxopts::value<std::string>()->default_value("cpu"))
3535
("device_index", "Comma-separated list of device IDs to use.",
3636
cxxopts::value<std::vector<int>>()->default_value("0"))
@@ -44,6 +44,8 @@ int main(int argc, char* argv[]) {
4444
cxxopts::value<std::string>()->default_value("default"))
4545
("cuda_compute_type", "Computation type on CUDA devices (overrides compute_type)",
4646
cxxopts::value<std::string>())
47+
("cann_compute_type", "Computation type on CANN devices (overrides compute_type)",
48+
cxxopts::value<std::string>())
4749
("cpu_compute_type", "Computation type on CPU devices (overrides compute_type)",
4850
cxxopts::value<std::string>())
4951
;
@@ -139,6 +141,10 @@ int main(int argc, char* argv[]) {
139141
if (args.count("cuda_compute_type"))
140142
compute_type = ctranslate2::str_to_compute_type(args["cuda_compute_type"].as<std::string>());
141143
break;
144+
case ctranslate2::Device::CANN:
145+
if (args.count("cann_compute_type"))
146+
compute_type = ctranslate2::str_to_compute_type(args["cann_compute_type"].as<std::string>());
147+
break;
142148
};
143149

144150
ctranslate2::ReplicaPoolConfig pool_config;

docker/cann/Dockerfile_cann

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Extened/build an image for CANN support
2+
# Ascend-cann-toolkit_<VERSION>.run is expected to exist in <project_root>/ascend_install_files
3+
4+
# preferably arm64
5+
FROM ubuntu:20.04
6+
7+
RUN DEBIAN_FRONTEND="noninteractive" apt update && \
8+
apt install --no-install-recommends net-tools -y && \
9+
apt install --no-install-recommends libsqlite3-dev -y && \
10+
apt install --no-install-recommends zlib1g -y && \
11+
apt install --no-install-recommends openssl -y
12+
13+
RUN DEBIAN_FRONTEND="noninteractive" apt update && \
14+
apt install --no-install-recommends ca-certificates -y && \
15+
apt install --no-install-recommends bc wget -y && \
16+
apt install --no-install-recommends curl gdb cmake gcc make g++ pkg-config unzip -y && \
17+
apt install --no-install-recommends libblas3 liblapack3 gfortran vim -y && \
18+
apt install --no-install-recommends liblapack-dev libblas-dev libhdf5-dev libffi-dev -y && \
19+
apt install --no-install-recommends libssl-dev zlib1g-dev xz-utils cython3 python3-h5py -y && \
20+
apt install --no-install-recommends libopenblas-dev libgmpxx4ldbl liblzma-dev -y && \
21+
apt install --no-install-recommends pciutils -y
22+
23+
24+
RUN DEBIAN_FRONTEND="noninteractive" apt update && \
25+
apt-get install -y --no-install-recommends \
26+
python3-dev \
27+
python3-pip \
28+
wget
29+
30+
RUN python3 -m pip --no-cache-dir install numpy && \
31+
python3 -m pip --no-cache-dir install decorator && \
32+
python3 -m pip --no-cache-dir install sympy && \
33+
python3 -m pip --no-cache-dir install cffi && \
34+
python3 -m pip --no-cache-dir install pyyaml && \
35+
python3 -m pip --no-cache-dir install pathlib2 && \
36+
python3 -m pip --no-cache-dir install protobuf && \
37+
python3 -m pip --no-cache-dir install scipy
38+
39+
RUN python3 -m pip --no-cache-dir install psutil && \
40+
python3 -m pip --no-cache-dir install requests absl-py
41+
42+
RUN python3 -m pip --no-cache-dir install attrs
43+
44+
# cleanup actions
45+
RUN rm -rf /root/.cache/pip
46+
RUN DEBIAN_FRONTEND="noninteractive" apt clean && rm -rf /var/lib/apt/lists/*
47+
RUN DEBIAN_FRONTEND="noninteractive" apt autoremove && apt autoclean
48+
49+
# Install Ascend toolkit
50+
COPY ascend_install_files ascend_install_files
51+
RUN chmod +x ascend_install_files/Ascend-cann-toolkit_7.0.RC1.alpha001_linux-aarch64.run && \
52+
ascend_install_files/Ascend-cann-toolkit_7.0.RC1.alpha001_linux-aarch64.run --install && \
53+
rm -f ascend_install_files/Ascend-cann-toolkit_7.0.RC1.alpha001_linux-aarch64.run
54+
55+
# Add usergroup & user
56+
RUN groupadd HwHiAiUser && useradd -g HwHiAiUser -m -d /home/HwHiAiUser HwHiAiUser
57+
58+
# This is copied from /usr/local/Ascend/ascend-toolkit/set_env.sh of the respective ascend-toolkit version
59+
ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH
60+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
61+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:$LD_LIBRARY_PATH
62+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:$PYTHONPATH
63+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:$PATH
64+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
65+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
66+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
67+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
68+
69+
# ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/hdf5/serial:$LD_LIBRARY_PATH
70+
# ENV HCCL_CONNECT_TIMEOUT=7200
71+
# ENV HCCL_WHITELIST_DISABLE=1
72+
# ENV HCCL_SECURITY_MODE=1
73+
74+
ENV ASCEND_GLOBAL_LOG_LEVEL=3
75+
76+
# Set env vars again in case of interactive ssh connection (ascend-toolkit assumed to be already installed)
77+
RUN cp /usr/local/Ascend/ascend-toolkit/set_env.sh /etc/profile.d/
78+
RUN chmod 644 /etc/profile.d/set_env.sh

docker/cann/run_container_cann.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
3+
# build image that will host CANN environment
4+
cd ../../
5+
docker build -t ctranslate2-aarch64 -f docker/cann/Dockerfile_cann --platform linux/arm64 .
6+
7+
# run the respective container
8+
docker run \
9+
-d --cap-add sys_ptrace \
10+
--pids-limit 409600 \
11+
--privileged --shm-size=128G \
12+
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
13+
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
14+
-v /usr/local/dcmi:/usr/local/dcmi \
15+
--name ctranslate2-aarch64 <container>

docs/hardware_support.md

+7
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,10 @@ See the [environment variables](environment_variables.md) `CT2_USE_MKL` and `CT2
2020
* NVIDIA GPUs with a Compute Capability greater or equal to 3.5
2121

2222
The driver requirement depends on the CUDA version. See the [CUDA Compatibility guide](https://docs.nvidia.com/deploy/cuda-compatibility/index.html) for more information.
23+
24+
## NPU
25+
26+
* AArch64/ARM64 processors
27+
* Ascend NPU AI Processor greater or equal to 910A
28+
29+
`CANN` version greater or equal to `7.0.RC1.alpha001` (depends on NPU model). See [CANN documentation](https://support.huawei.com/enterprise/en/ascend-computing/cann-pid-251168373) for more information.

examples/cann/CMakeLists.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
cmake_minimum_required(VERSION 3.7)
2+
project(cann)
3+
set(CMAKE_CXX_STANDARD 17)
4+
set(CMAKE_BUILD_TYPE Release)
5+
find_package(Threads)
6+
add_executable(cann_run main.cc)
7+
target_link_libraries(cann_run PRIVATE
8+
${CMAKE_THREAD_LIBS_INIT}
9+
ctranslate2
10+
)

examples/cann/README.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# CANN example query
2+
This example demonstrates a translation query employing `CANN` using the English-German Transformer model trained with OpenNMT-py as in [CTranslate2 documentation](https://opennmt.net/CTranslate2/quickstart.html).
3+
4+
## Environment setup
5+
- Create environment:`docker/cann/Dockerfile_cann`
6+
- Run the container: `docker/cann/run_container_cann.sh`
7+
8+
## Download model
9+
```bash
10+
wget https://s3.amazonaws.com/opennmt-models/transformer-ende-wmt-pyOnmt.tar.gz
11+
tar xf transformer-ende-wmt-pyOnmt.tar.gz
12+
```
13+
14+
## Build executable
15+
Run `examples/cann/build_run.sh`
16+
17+
### Expected output
18+
19+
```
20+
current path: "<current path>"
21+
input data path: "<input data path>"
22+
[<timestamp>] [ctranslate2] [thread 49835] [info] CPU: ARM (NEON=true)
23+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Selected ISA: NEON
24+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Use Intel MKL: false
25+
[<timestamp>] [ctranslate2] [thread 49835] [info] - SGEMM backend: OpenBLAS (packed: false)
26+
[<timestamp>] [ctranslate2] [thread 49835] [info] - GEMM_S16 backend: none (packed: false)
27+
[<timestamp>] [ctranslate2] [thread 49835] [info] - GEMM_S8 backend: Ruy (packed: false, u8s8 preferred: false)
28+
[<timestamp>] [ctranslate2] [thread 49835] [info] NPU:
29+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Number of NPU cores: 8
30+
[<timestamp>] [ctranslate2] [thread 49835] [info] - aclrtRunMode: ACL_HOST
31+
[<timestamp>] [ctranslate2] [thread 49835] [info] Loaded model <path> on device cann:0
32+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Binary version: 6
33+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Model specification revision: 7
34+
[<timestamp>] [ctranslate2] [thread 49835] [info] - Selected compute type: float32
35+
input data:
36+
▁H ello ▁world !
37+
Start: Warmup examples
38+
output:
39+
▁Hallo ▁Welt !
40+
input data:
41+
▁H ello ▁world !
42+
Start: Query examples
43+
output:
44+
▁Hallo ▁Welt !
45+
```

examples/cann/build_run.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
# execute from project root
4+
5+
# first build ct2lib
6+
rm -rf build-release/
7+
mkdir build-release && cd build-release || exit
8+
9+
cmake -DWITH_CANN=ON -DCMAKE_BUILD_TYPE=Release -DBUILD_CLI=OFF -DWITH_MKL=OFF -DOPENMP_RUNTIME=COMP -DCMAKE_PREFIX_PATH="/opt/OpenBLAS" -DWITH_OPENBLAS=ON -DWITH_RUY=ON ..
10+
11+
make -j"$(nproc)"
12+
13+
rm CMakeCache.txt
14+
15+
# then build cann_run
16+
cmake -DCMAKE_BUILD_TYPE=Release ../examples/cann/
17+
18+
make -j"$(nproc)"
19+
# ./cann_run <ende_ctranslate2_path>

0 commit comments

Comments
 (0)