From b7b7b2bebb9b8a503bf706c59a08f877944e3b31 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 31 Mar 2022 12:46:22 +0200 Subject: [PATCH] [docker/train] Build binary artifacts in the image, improve layer caching, prepare for multi-arch build --- .github/workflows/build-and-test.yml | 90 +++++++----- Dockerfile.build | 184 ------------------------- Dockerfile.train | 99 +++++++------ Dockerfile.train.dockerignore | 6 +- Makefile | 8 -- ci_scripts/build-utils.sh | 4 +- ci_scripts/gen-scorer-package-build.sh | 22 +++ ci_scripts/tf-vars.sh | 24 +++- native_client/ctcdecode/Makefile | 2 +- native_client/definitions.mk | 19 ++- setup.py | 2 +- 11 files changed, 164 insertions(+), 296 deletions(-) delete mode 100644 Dockerfile.build delete mode 100644 Makefile create mode 100755 ci_scripts/gen-scorer-package-build.sh diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8656c47e0..db9fff4fd 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -1164,44 +1164,68 @@ jobs: name: '' # use filename path: "*.zip" release-tag: ${{ needs.create-release.outputs.release-tag }} - docker-build: - name: "Build Dockerfile.build image" + docker-train-build: + name: "Build Docker training images" runs-on: ubuntu-20.04 + strategy: + matrix: + arch: ["amd64"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' + - name: Log in to the Container registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - name: Check VERSION matches Git tag and compute Docker tag id: compute-tag run: | - if [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then + set -ex + base="ghcr.io/coqui-ai/stt-train" + if [[ "${{ github.ref }}" = "refs/heads/main" ]]; then + # Push to main + tags="${base}:main_${{ matrix.arch }},${base}:${{ github.sha }}_${{ matrix.arch }}" + elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then # PR build - echo "::set-output name=tag::dev" + tags="${base}:dev_${{ matrix.arch }}" else VERSION="v$(cat VERSION)" if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then echo "Pushed tag does not match VERSION file. Aborting push." exit 1 fi - echo "::set-output name=tag::${VERSION}" + tags="${base}:${VERSION}_${{ matrix.arch }},${base}:latest_${{ matrix.arch }},${base}:${{ github.sha }}_${{ matrix.arch }}" fi - - name: Build - run: | - DOCKER_TAG="${{ steps.compute-tag.outputs.tag }}" - docker build -f Dockerfile.build . -t ghcr.io/coqui-ai/stt-build:latest -t "ghcr.io/coqui-ai/stt-build:${DOCKER_TAG}" - docker-publish: - name: "Build and publish Docker training image to GHCR" + echo "::set-output name=tags::${tags}" + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + - name: Generate version info for build + run: | + make -C native_client/ctcdecode workspace_status.cc + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: Dockerfile.train + platforms: linux/${{ matrix.arch }} + push: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) }} + tags: ${{ steps.compute-tag.outputs.tags }} + docker-train-manifest: + name: "Publish Docker training manifest to GHCR" runs-on: ubuntu-20.04 - needs: [upload-nc-release-assets] - if: always() + needs: [docker-train-build] + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) steps: - uses: actions/checkout@v2 - with: - fetch-depth: 0 - submodules: 'recursive' - name: Log in to the Container registry - uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + uses: docker/login-action@v1 with: registry: ghcr.io username: ${{ github.actor }} @@ -1210,36 +1234,34 @@ jobs: id: compute-tag run: | set -ex + base="ghcr.io/coqui-ai/stt-train" if [[ "${{ github.ref }}" = "refs/heads/main" ]]; then # Push to main - tags="main ${{ github.sha }}" + tags="${base}:main,${base}:${{ github.sha }}" elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then # PR build - tags="dev" + tags="${base}:dev" else VERSION="v$(cat VERSION)" if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then echo "Pushed tag does not match VERSION file. Aborting push." exit 1 fi - tags="${VERSION} latest ${{ github.sha }}" + tags="${base}:${VERSION},${base}:latest,${base}:${{ github.sha }}" fi echo "::set-output name=tags::${tags}" - - name: Build - run: | - set -ex - declare -a tag_args=() - for tag in ${{ steps.compute-tag.outputs.tags }}; do - tag_args+=("-t" "ghcr.io/coqui-ai/stt-train:${tag}") - done - docker build -f Dockerfile.train . ${tag_args[@]} - - name: Push - run: | - set -ex - for tag in ${{ steps.compute-tag.outputs.tags }}; do - docker push ghcr.io/coqui-ai/stt-train:${tag} + - name: Create and push manifest + run: | + docker manifest create ghcr.io/coqui-ai/stt-train + base=${{ steps.compute-tag.outputs.tags }} + should_push=${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) }} + for tag in $(echo $base | sed 's/,/ /g'); do + docker manifest create $tag \ + --amend ${tag}_amd64 + if [[ "$should_push" == "true" ]]; then + docker manifest push $tag + fi done - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) twine-upload-decoder: name: "Upload coqui_stt_ctcdecoder packages to PyPI" runs-on: ubuntu-20.04 diff --git a/Dockerfile.build b/Dockerfile.build deleted file mode 100644 index 9a5a072cf..000000000 --- a/Dockerfile.build +++ /dev/null @@ -1,184 +0,0 @@ -# Please refer to the USING documentation, "Dockerfile for building from source" - -# Need devel version cause we need /usr/include/cudnn.h -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 - -# >> START Install base software - -# Get basic packages -RUN apt-get update && apt-get install -y --no-install-recommends \ - apt-utils \ - bash-completion \ - build-essential \ - ca-certificates \ - cmake \ - curl \ - g++ \ - gcc \ - git \ - libbz2-dev \ - libboost-all-dev \ - libgsm1-dev \ - libltdl-dev \ - liblzma-dev \ - libmagic-dev \ - libpng-dev \ - libsox-fmt-mp3 \ - libsox-dev \ - locales \ - openjdk-8-jdk \ - pkg-config \ - python3 \ - python3-dev \ - python3-pip \ - python3-wheel \ - python3-numpy \ - sox \ - unzip \ - wget \ - zlib1g-dev - -RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 - -# Install Bazel -RUN curl -LO "https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-amd64" && \ - mv bazelisk-linux-amd64 /usr/bin/bazel && \ - chmod +x /usr/bin/bazel - -# Try and free some space -RUN rm -rf /var/lib/apt/lists/* - -# << END Install base software - -# >> START Configure Tensorflow Build - -# GPU Environment Setup -ENV TF_NEED_ROCM 0 -ENV TF_NEED_OPENCL_SYCL 0 -ENV TF_NEED_OPENCL 0 -ENV TF_NEED_CUDA 1 -ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" -ENV TF_CUDA_VERSION 10.1 -ENV TF_CUDNN_VERSION 7.6 -ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 -ENV TF_NCCL_VERSION 2.8 - -# Common Environment Setup -ENV TF_BUILD_CONTAINER_TYPE GPU -ENV TF_BUILD_OPTIONS OPT -ENV TF_BUILD_DISABLE_GCP 1 -ENV TF_BUILD_ENABLE_XLA 0 -ENV TF_BUILD_PYTHON_VERSION PYTHON3 -ENV TF_BUILD_IS_OPT OPT -ENV TF_BUILD_IS_PIP PIP - -# Other Parameters -ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma -ENV TF_NEED_GCP 0 -ENV TF_NEED_HDFS 0 -ENV TF_NEED_JEMALLOC 1 -ENV TF_NEED_OPENCL 0 -ENV TF_CUDA_CLANG 0 -ENV TF_NEED_MKL 0 -ENV TF_ENABLE_XLA 0 -ENV TF_NEED_AWS 0 -ENV TF_NEED_KAFKA 0 -ENV TF_NEED_NGRAPH 0 -ENV TF_DOWNLOAD_CLANG 0 -ENV TF_NEED_TENSORRT 0 -ENV TF_NEED_GDR 0 -ENV TF_NEED_VERBS 0 -ENV TF_NEED_OPENCL_SYCL 0 - -ENV PYTHON_BIN_PATH /usr/bin/python3.6 -ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages - -# << END Configure Tensorflow Build - -# >> START Configure Bazel - -# Running bazel inside a `docker build` command causes trouble, cf: -# https://github.com/bazelbuild/bazel/issues/134 -# The easiest solution is to set up a bazelrc file forcing --batch. -RUN echo "startup --batch" >>/etc/bazel.bazelrc -# Similarly, we need to workaround sandboxing issues: -# https://github.com/bazelbuild/bazel/issues/418 -RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ - >>/etc/bazel.bazelrc - -# << END Configure Bazel - -WORKDIR / -COPY . /STT/ - -# >> START Build and bind - -WORKDIR /STT/tensorflow - -# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471 -RUN ./configure - -# Using CPU optimizations: -# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx. -# Adding --config=cuda flag to build using CUDA. - -# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment - -# Build STT - -RUN bazel build \ - --verbose_failures \ - --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ - -c opt \ - --copt=-mtune=generic \ - --copt=-march=x86-64 \ - --copt=-msse \ - --copt=-msse2 \ - --copt=-msse3 \ - --copt=-msse4.1 \ - --copt=-msse4.2 \ - --copt=-mavx \ - --config=noaws \ - --config=nogcp \ - --config=nohdfs \ - --config=nonccl \ - //native_client:libstt.so - -# Copy built libs to /STT/native_client -RUN cp bazel-bin/native_client/libstt.so bazel-bin/native_client/libkenlm.so /STT/native_client/ - -# Build client.cc and install Python client and decoder bindings -ENV TFDIR /STT/tensorflow - -RUN nproc - -WORKDIR /STT/native_client -RUN make NUM_PROCESSES=$(nproc) stt - -WORKDIR /STT -RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings -RUN pip3 install -U pip setuptools wheel -RUN pip3 install --upgrade native_client/python/dist/*.whl - -RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings -RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl - -# << END Build and bind - -# Allow Python printing utf-8 -ENV PYTHONIOENCODING UTF-8 - -# Build KenLM in /STT/native_client/kenlm folder -WORKDIR /STT/native_client -RUN rm -rf kenlm && \ - git clone https://github.com/kpu/kenlm && \ - cd kenlm && \ - git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \ - mkdir -p build && \ - cd build && \ - cmake .. && \ - make -j $(nproc) - -# Done -WORKDIR /STT diff --git a/Dockerfile.train b/Dockerfile.train index f47dc8832..86b0c3daf 100644 --- a/Dockerfile.train +++ b/Dockerfile.train @@ -2,7 +2,26 @@ # You can train "acoustic models" with audio + Tensorflow, and # you can create "scorers" with text + KenLM. -FROM nvcr.io/nvidia/tensorflow:22.02-tf1-py3 AS kenlm-build +FROM ghcr.io/reuben/manylinux_2_24:2022-03-31-361e6b6 as py38-venv +ENV DEBIAN_FRONTEND=noninteractive + +ENV VIRTUAL_ENV=/venv +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN /opt/python/cp38*/bin/python -m venv /venv && \ + pip install --upgrade pip wheel setuptools + +FROM py38-venv AS gen-scorer-package-build + +COPY native_client /code/native_client +COPY tensorflow /code/tensorflow +COPY ci_scripts /code/ci_scripts +WORKDIR /code +RUN ./ci_scripts/tf-setup.sh && \ + ./ci_scripts/gen-scorer-package-build.sh && \ + cp tensorflow/bazel-bin/native_client/generate_scorer_package . && \ + cd tensorflow && /code/bin/bazel clean && rm -rf /code/bin /code/dls + +FROM ghcr.io/reuben/manylinux_2_24:2022-03-31-361e6b6 AS kenlm-build ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ @@ -14,73 +33,47 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* # Build KenLM to generate new scorers -WORKDIR /code COPY kenlm /code/kenlm -RUN cd /code/kenlm && \ - mkdir -p build && \ - cd build && \ +RUN mkdir -p /code/kenlm/build && \ + cd /code/kenlm/build && \ cmake .. && \ - make -j $(nproc) || \ + ( make -j $(nproc) || \ ( echo "ERROR: Failed to build KenLM."; \ echo "ERROR: Make sure you update the kenlm submodule on host before building this Dockerfile."; \ echo "ERROR: $ cd STT; git submodule update --init kenlm"; \ - exit 1; ) - - -FROM nvcr.io/nvidia/tensorflow:22.02-tf1-py3 -ENV DEBIAN_FRONTEND=noninteractive + exit 1; ) ) && \ + cd /code && \ + cp -R /code/kenlm/build/bin /code/kenlm-bin && \ + rm -rf /code/kenlm/build -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - git \ - wget \ - libopus0 \ - libopusfile0 \ - libsndfile1 \ - sox \ - libsox-fmt-mp3 \ - python3-venv && \ - rm -rf /var/lib/apt/lists/* - -RUN python3 -m venv --system-site-packages /venv -ENV VIRTUAL_ENV=/venv -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -# Make sure pip and its dependencies are up-to-date -RUN pip install --upgrade pip wheel setuptools - -WORKDIR /code +FROM py38-venv as decoder-build COPY native_client /code/native_client -COPY .git /code/.git COPY training/coqui_stt_training/VERSION /code/training/coqui_stt_training/VERSION +COPY VERSION /code/VERSION COPY training/coqui_stt_training/GRAPH_VERSION /code/training/coqui_stt_training/GRAPH_VERSION +COPY GRAPH_VERSION /code/GRAPH_VERSION +COPY native_client/ctcdecode/workspace_status.cc /code/native_client/ctcdecode/workspace_status.cc # Build CTC decoder first, to avoid clashes on incompatible versions upgrades -RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings -RUN pip install --upgrade native_client/ctcdecode/dist/*.whl - -COPY setup.py /code/setup.py -COPY VERSION /code/VERSION -COPY training /code/training -# Copy files from previous build stages -RUN mkdir -p /code/kenlm/build/ -COPY --from=kenlm-build /code/kenlm/build/bin /code/kenlm/build/bin +WORKDIR /code +RUN make -C native_client/ctcdecode bindings NUM_PROCESSES=$(nproc) && \ + cp native_client/ctcdecode/dist/*.whl . && \ + make -C native_client/ctcdecode clean -# Tool to convert output graph for inference -RUN curl -L https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip | funzip > convert_graphdef_memmapped_format && \ - chmod +x convert_graphdef_memmapped_format +FROM nvcr.io/nvidia/tensorflow:22.02-tf1-py3 +ENV DEBIAN_FRONTEND=noninteractive -# Pre-built native client tools -RUN LATEST_STABLE_RELEASE=$(curl "https://api.github.com/repos/coqui-ai/STT/releases/latest" | python -c 'import sys; import json; print(json.load(sys.stdin)["tag_name"])') \ - bash -c 'curl -L https://github.com/coqui-ai/STT/releases/download/${LATEST_STABLE_RELEASE}/native_client.tflite.Linux.tar.xz | tar -xJvf -' +COPY . /code +COPY --from=decoder-build /code/*.whl /code +COPY --from=kenlm-build /code/kenlm-bin /code/kenlm/build/bin +COPY --from=gen-scorer-package-build /code/generate_scorer_package /code # Install STT -# No need for the decoder since we did it earlier +# Use decoder wheel from previous stage # TensorFlow GPU should already be installed on the base image, # and we don't want to break that -RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip install --upgrade -e . - -# Copy rest of the code and test training -COPY . /code -RUN ./bin/run-ldc93s1.sh && rm -rf ~/.local/share/stt +WORKDIR /code +RUN pip install --upgrade coqui_stt_ctcdecoder-*.whl && \ + DS_NODECODER=y DS_NOTENSORFLOW=y pip install --upgrade -e . && \ + ./bin/run-ldc93s1.sh && rm -rf ~/.local/share/stt diff --git a/Dockerfile.train.dockerignore b/Dockerfile.train.dockerignore index 1c96e61b9..114d0aaf6 100644 --- a/Dockerfile.train.dockerignore +++ b/Dockerfile.train.dockerignore @@ -1,10 +1,8 @@ -.git/lfs -tensorflow -.git/modules/tensorflow +.git +Dockerfile.train native_client/ds-swig native_client/libstt.so native_client/stt native_client/ctcdecode/dist/ native_client/ctcdecode/temp_build native_client/ctcdecode/third_party.a -native_client/ctcdecode/workspace_status.cc diff --git a/Makefile b/Makefile deleted file mode 100644 index 6953c4372..000000000 --- a/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -STT_REPO ?= https://github.com/coqui-ai/STT.git -STT_SHA ?= origin/main - -Dockerfile%: Dockerfile%.tmpl - sed \ - -e "s|#STT_REPO#|$(STT_REPO)|g" \ - -e "s|#STT_SHA#|$(STT_SHA)|g" \ - < $< > $@ diff --git a/ci_scripts/build-utils.sh b/ci_scripts/build-utils.sh index 11f63c65f..cffa0c398 100755 --- a/ci_scripts/build-utils.sh +++ b/ci_scripts/build-utils.sh @@ -8,9 +8,11 @@ do_bazel_build() cd ${DS_TFDIR} + WORKSPACE_STATUS=${WORKSPACE_STATUS:='--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh"'} + bazel build ${BAZEL_CACHE} \ -s --explain bazel_explain.log --verbose_explanations \ - --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ + ${WORKSPACE_STATUS} \ -c ${_opt_or_dbg} ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS} ls -lh bazel-bin/native_client diff --git a/ci_scripts/gen-scorer-package-build.sh b/ci_scripts/gen-scorer-package-build.sh new file mode 100755 index 000000000..dee6bfedd --- /dev/null +++ b/ci_scripts/gen-scorer-package-build.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -xe + +macos_target_arch=$1 +SYSTEM_TARGET=host +if [ "$(uname)-$(uname -m)" = "Darwin-x86_64" -a "${macos_target_arch}" = "arm64" ]; then + SYSTEM_TARGET="darwin-arm64" +fi + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh +source $(dirname "$0")/build-utils.sh + +source $(dirname "$0")/tf-vars.sh + +BAZEL_TARGETS="//native_client:generate_scorer_package" +WORKSPACE_STATUS=" " + +BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" + +do_bazel_build diff --git a/ci_scripts/tf-vars.sh b/ci_scripts/tf-vars.sh index 3ac7e7052..509570b0a 100755 --- a/ci_scripts/tf-vars.sh +++ b/ci_scripts/tf-vars.sh @@ -13,8 +13,13 @@ export OS=$(uname) if [ "${OS}" = "Linux" ]; then export DS_ROOT_TASK=${CI_TASK_DIR} - BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-amd64 - BAZEL_SHA256=4cb534c52cdd47a6223d4596d530e7c9c785438ab3b0a49ff347e991c210b2cd + if [[ "$(uname -m)" == "aarch64" ]]; then + BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-arm64 + BAZEL_SHA256=f9119deb1eeb6d730ee8b2e1a14d09cb45638f0447df23144229c5b3b3bc2408 + else + BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 + BAZEL_SHA256=231ec5ca8115e94c75a1f4fbada1a062b48822ca04f21f26e4cb1cd8973cd458 + fi ANDROID_NDK_URL=https://dl.google.com/android/repository/android-ndk-r19c-linux-x86_64.zip ANDROID_NDK_SHA256=4c62514ec9c2309315fd84da6d52465651cdb68605058f231f1e480fcf2692e1 @@ -62,8 +67,13 @@ elif [ "${OS}" = "Darwin" ]; then export DS_ROOT_TASK=${CI_TASK_DIR} - BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-darwin-amd64 - BAZEL_SHA256=e485bbf84532d02a60b0eb23c702610b5408df3a199087a4f2b5e0995bbf2d5a + if [[ "$(uname -m)" == "arm64" ]]; then + BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-darwin-arm64 + BAZEL_SHA256=1e18c98312d1a03525f704214304be2445478392c8687888d5d37e6a680f31e6 + else + BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-darwin-amd64 + BAZEL_SHA256=c725fd574ea723ab25187d63ca31a5c9176d40433af92cd2449d718ee97e76a2 + fi SHA_SUM="shasum -a 256 -c" TAR=gtar @@ -140,7 +150,11 @@ else # # Build for generic amd64 platforms, no device-specific optimization # See https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html for targetting specific CPUs - BAZEL_OPT_FLAGS="--copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx" + if [[ "$(uname -m)" == "aarch64" ]]; then + BAZEL_OPT_FLAGS="" + else + BAZEL_OPT_FLAGS="--copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx" + fi fi if [ "$CI" != "true" ]; then diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index d8899ea74..5a539535d 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -1,4 +1,4 @@ -.PHONY: bindings clean workspace_status.cc +.PHONY: bindings clean include ../definitions.mk diff --git a/native_client/definitions.mk b/native_client/definitions.mk index aa1d19b09..4cd169dbc 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -14,6 +14,7 @@ TOOL_LDD := ldd TOOL_LIBEXE := OS := $(shell uname -s) +ARCH := $(shell uname -m) ifeq ($(findstring _NT,$(OS)),_NT) PLATFORM_EXE_SUFFIX := .exe @@ -46,7 +47,7 @@ SOX_LDFLAGS := $(LIBSOX_STATIC_DEPS) $(LIBOPUSFILE_STATIC_DEPS) -fra else SOX_LDFLAGS := `pkg-config --libs sox` endif # OS others -PYTHON_PACKAGES := numpy${NUMPY_BUILD_VERSION} +PYTHON_PACKAGES := oldest-supported-numpy ifeq ($(OS),Linux) PYTHON_PLATFORM_NAME ?= --plat-name manylinux_2_24_x86_64 endif @@ -63,7 +64,7 @@ LINK_PATH_STT := CFLAGS_STT := -nologo -Fe$(STT_BIN) SOX_CFLAGS := SOX_LDFLAGS := -PYTHON_PACKAGES := numpy${NUMPY_BUILD_VERSION} +PYTHON_PACKAGES := oldest-supported-numpy endif ifeq ($(TARGET),rpi3) @@ -218,11 +219,19 @@ endef SWIG_DIST_URL ?= ifeq ($(SWIG_DIST_URL),) ifeq ($(findstring Linux,$(OS)),Linux) -SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.linux.amd64.tar.gz" +ifeq ($(ARCH),aarch64) +SWIG_DIST_URL := "https://github.com/coqui-ai/STT/releases/download/v1.3.0/ds-swig.linux.arm64.tar.gz" +else +SWIG_DIST_URL := "https://github.com/coqui-ai/STT/releases/download/v1.3.0/ds-swig.linux.amd64.tar.gz" +endif # aarch64 else ifeq ($(findstring Darwin,$(OS)),Darwin) -SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.darwin.amd64.tar.gz" +ifeq ($(ARCH),arm64) +SWIG_DIST_URL := "https://github.com/coqui-ai/STT/releases/download/v1.3.0/ds-swig.darwin.arm64.tar.gz" +else +SWIG_DIST_URL := "https://github.com/coqui-ai/STT/releases/download/v1.3.0/ds-swig.darwin.amd64.tar.gz" +endif # arm64 else ifeq ($(findstring _NT,$(OS)),_NT) -SWIG_DIST_URL := "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/ds-swig.win.amd64.tar.gz" +SWIG_DIST_URL := "https://github.com/coqui-ai/STT/releases/download/v1.3.0/ds-swig.win.amd64.tar.gz" else $(error There is no prebuilt SWIG available for your platform. Please produce one and set SWIG_DIST_URL.) endif # findstring() diff --git a/setup.py b/setup.py index 308768f67..3076721d4 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def main(): "coqpit", "numpy", "optuna", - "numba <= 0.53.1", + "numba == 0.54.1", "opuslib == 2.0.0", "pandas", "progressbar2",