Skip to content

use flash_attn_with_kvcache for faster inference #1320

use flash_attn_with_kvcache for faster inference

use flash_attn_with_kvcache for faster inference #1320

Workflow file for this run

name: Lint & Tests
on: [push, pull_request]
jobs:
lint-and-tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8] # build only for 3.8 for now
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade setuptools
pip install -e .
pip install -r requirements.opt.txt
pip install sacrebleu
pip install flake8
python -m pip install black==22.* flake8==3.8.*
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Check code with Black
run: |
black --check .
- name: Lint with flake8
run: |
flake8 .
- name: Unit tests
run: |
python -m unittest discover
- name: Test vocabulary build
run: |
python onmt/bin/build_vocab.py \
-config data/data.yaml \
-save_data /tmp/onmt \
-n_sample 5000 \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
&& rm -rf /tmp/sample
- name: Test vocabulary build with features
run: |
python onmt/bin/build_vocab.py \
-config data/features_data.yaml \
-save_data /tmp/onmt_feat \
-src_vocab /tmp/onmt_feat.vocab.src \
-tgt_vocab /tmp/onmt_feat.vocab.tgt \
-n_sample -1 \
&& rm -rf /tmp/sample
- name: Test field/transform dump
run: |
# The dumped fields are used later when testing tools
python train.py \
-config data/data.yaml \
-save_data /tmp/onmt.train.check \
-dump_fields \
-dump_transforms \
-n_sample 30 \
-num_workers 0 -bucket_size 1024 \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000
- name: Test RNN training
run: |
python train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-hidden_size 2 \
-num_workers 0 -bucket_size 1024 \
-batch_size 10 \
-word_vec_size 5 \
-report_every 5\
-hidden_size 10 \
-train_steps 10 \
-tensorboard "true" \
-tensorboard_log_dir /tmp/logs_train
python onmt/tests/test_events.py --logdir /tmp/logs_train -tensorboard_checks train
- name: Test RNN training and validation with copy
run: |
python train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-hidden_size 2 \
-num_workers 0 -bucket_size 1024 \
-batch_size 10 \
-word_vec_size 5 \
-report_every 5 \
-hidden_size 10 \
-train_steps 10 -valid_steps 5 \
-tensorboard "true" \
-tensorboard_log_dir /tmp/logs_train_and_valid \
-copy_attn
python onmt/tests/test_events.py --logdir /tmp/logs_train_and_valid -tensorboard_checks train
python onmt/tests/test_events.py --logdir /tmp/logs_train_and_valid -tensorboard_checks valid
- name: Test RNN training with coverage
run: |
python train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-hidden_size 2 -batch_size 10 \
-num_workers 0 -bucket_size 1024 \
-word_vec_size 5 -report_every 5 \
-coverage_attn true -lambda_coverage 0.1 \
-hidden_size 10 -train_steps 10
- name: Test Transformer training with align
run: |
python train.py \
-config data/align_data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-encoder_type transformer \
-decoder_type transformer \
-layers 4 \
-word_vec_size 16 \
-hidden_size 16 \
-num_workers 0 -bucket_size 1024 \
-heads 2 \
-transformer_ff 64 \
-lambda_align 0.05 \
-alignment_layer 2 \
-alignment_heads 0 \
-dropout_steps 0 3 7 \
-dropout 0.3 0.2 0.1 \
-attention_dropout 0.2 0.1 0.1 \
-report_every 5 \
-train_steps 10
- name : Test Transformer training and validation with dynamic scoring and copy
run: |
python3 train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-encoder_type transformer \
-decoder_type transformer \
-layers 4 \
-word_vec_size 16 \
-hidden_size 16 \
-num_workers 0 -bucket_size 1024 \
-heads 2 \
-transformer_ff 64 \
-num_workers 0 -bucket_size 1024 \
-accum_count 2 4 8 \
-accum_steps 0 15000 30000 \
-save_model /tmp/onmt.model \
-train_steps 10 -valid_steps 5 \
-report_every 2 \
-valid_metrics "BLEU" "TER" \
-tensorboard "true" \
-scoring_debug "true" \
-tensorboard_log_dir /tmp/logs_dynamic-scoring_and_copy \
-dump_preds /tmp/dump_preds \
-position_encoding \
-copy_attn
python onmt/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_copy -tensorboard_checks valid_metrics
- name : Test Transformer training and validation with dynamic scoring and maxrelative
run: |
python3 train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-encoder_type transformer \
-decoder_type transformer \
-layers 4 \
-word_vec_size 16 \
-hidden_size 16 \
-num_workers 0 -bucket_size 1024 \
-heads 2 \
-transformer_ff 64 \
-num_workers 0 -bucket_size 1024 \
-accum_count 2 4 8 \
-accum_steps 0 15000 30000 \
-save_model /tmp/onmt.model \
-train_steps 10 -valid_steps 5 \
-report_every 2 \
-valid_metrics "BLEU" "TER" \
-tensorboard "true" \
-scoring_debug "true" \
-tensorboard_log_dir /tmp/logs_dynamic-scoring_and_relative \
-dump_preds /tmp/dump_preds \
-max_relative_positions 8
python onmt/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_relative -tensorboard_checks valid_metrics
- name : Test Transformer training and validation with dynamic scoring and rotary
run: |
python3 train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-encoder_type transformer \
-decoder_type transformer \
-layers 4 \
-word_vec_size 16 \
-hidden_size 16 \
-num_workers 0 -bucket_size 1024 \
-heads 2 \
-transformer_ff 64 \
-num_workers 0 -bucket_size 1024 \
-accum_count 2 4 8 \
-accum_steps 0 15000 30000 \
-save_model /tmp/onmt.model \
-train_steps 10 -valid_steps 5 \
-report_every 2 \
-valid_metrics "BLEU" "TER" \
-tensorboard "true" \
-scoring_debug "true" \
-tensorboard_log_dir /tmp/logs_dynamic-scoring_and_rotary \
-dump_preds /tmp/dump_preds \
-max_relative_positions -1
python onmt/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_rotary -tensorboard_checks valid_metrics
- name : Test Transformer training and validation with dynamic scoring and alibi
run: |
python3 train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-encoder_type transformer \
-decoder_type transformer \
-layers 4 \
-word_vec_size 16 \
-hidden_size 16 \
-num_workers 0 -bucket_size 1024 \
-heads 2 \
-transformer_ff 64 \
-num_workers 0 -bucket_size 1024 \
-accum_count 2 4 8 \
-accum_steps 0 15000 30000 \
-save_model /tmp/onmt.model \
-train_steps 10 -valid_steps 5 \
-report_every 2 \
-valid_metrics "BLEU" "TER" \
-tensorboard "true" \
-scoring_debug "true" \
-tensorboard_log_dir /tmp/logs_dynamic-scoring_and_alibi \
-dump_preds /tmp/dump_preds \
-max_relative_positions 8
python onmt/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_alibi -tensorboard_checks valid_metrics
- name: Test LM training
run: |
python train.py \
-config data/lm_data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.src \
-model_task lm \
-encoder_type transformer_lm \
-decoder_type transformer_lm \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-num_workers 0 -bucket_size 1024 \
-dec_layers 2 -batch_size 10 \
-heads 4 -transformer_ff 64 \
-word_vec_size 16 -report_every 5 \
-hidden_size 16 -train_steps 10
- name: Test LM training with copy
run: |
python train.py \
-config data/lm_data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.src \
-model_task lm \
-encoder_type transformer_lm \
-decoder_type transformer_lm \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-num_workers 0 -bucket_size 1024 \
-dec_layers 2 -batch_size 10 \
-heads 4 -transformer_ff 64 \
-word_vec_size 16 -report_every 5 \
-hidden_size 16 -train_steps 10 \
-copy_attn
- name: Test Graph neural network training
run: |
python train.py \
-config data/ggnn_data.yaml \
-src_seq_length 1000 \
-tgt_seq_length 30 \
-encoder_type ggnn \
-layers 2 \
-decoder_type rnn \
-hidden_size 256 \
-learning_rate 0.1 \
-learning_rate_decay 0.8 \
-global_attention general \
-batch_size 32 \
-word_vec_size 256 \
-bridge \
-num_workers 0 -bucket_size 1024 \
-train_steps 10 \
-n_edge_types 9 \
-state_dim 256 \
-n_steps 10 \
-n_node 64
- name: Testing training with features
run: |
python onmt/bin/train.py \
-config data/features_data.yaml \
-src_vocab /tmp/onmt_feat.vocab.src \
-tgt_vocab /tmp/onmt_feat.vocab.tgt \
-src_vocab_size 1000 -tgt_vocab_size 1000 \
-hidden_size 2 -batch_size 10 \
-num_workers 0 -bucket_size 1024 \
-word_vec_size 5 -hidden_size 10 \
-report_every 5 -train_steps 10 \
-save_model /tmp/onmt.model \
-save_checkpoint_steps 10
- name: Testing training with features and dynamic scoring
run: |
python onmt/bin/train.py \
-config data/features_data.yaml \
-src_vocab /tmp/onmt_feat.vocab.src \
-tgt_vocab /tmp/onmt_feat.vocab.tgt \
-src_vocab_size 1000 -tgt_vocab_size 1000 \
-hidden_size 2 -batch_size 10 \
-word_vec_size 5 -hidden_size 10 \
-num_workers 0 -bucket_size 1024 \
-report_every 5 -train_steps 10 -valid_steps 5\
-valid_metrics "BLEU" "TER" \
-save_model /tmp/onmt.model \
-save_checkpoint_steps 10
- name: Testing translation with features
run: |
python translate.py \
-model /tmp/onmt.model_step_10.pt \
-src data/data_features/src-test-with-feats.txt \
-n_src_feats 1 -verbose
- name: Test RNN translation
run: |
head data/src-test.txt > /tmp/src-test.txt
python translate.py \
-model onmt/tests/test_model.pt \
-src /tmp/src-test.txt \
-verbose
- name: Test RNN ensemble translation
run: |
head data/src-test.txt > /tmp/src-test.txt
python translate.py \
-model onmt/tests/test_model.pt \
onmt/tests/test_model.pt \
-src /tmp/src-test.txt \
-verbose
- name: Test RNN translation with beam search
run: |
python translate.py \
-model onmt/tests/test_model2.pt \
-src data/morph/src.valid \
-verbose \
-batch_size 10 \
-beam_size 10 \
-tgt data/morph/tgt.valid \
-out /tmp/trans
diff data/morph/tgt.valid /tmp/trans && rm /tmp/trans
- name: Test RNN translation with random sampling
run: |
python translate.py \
-model onmt/tests/test_model2.pt \
-src data/morph/src.valid \
-verbose \
-batch_size 10 \
-beam_size 1 \
-seed 1 \
-random_sampling_topk "-1" \
-random_sampling_temp 0.0001 \
-tgt data/morph/tgt.valid \
-out /tmp/trans
diff data/morph/tgt.valid /tmp/trans && rm /tmp/trans
- name: Test LM generation
run: |
head data/src-test.txt > /tmp/src-test.txt
python translate.py \
-model onmt/tests/test_model_lm.pt \
-src /tmp/src-test.txt \
-verbose
- name: Test LM generation with beam search
run: |
python translate.py \
-model onmt/tests/test_model_lm.pt \
-src data/data_lm/src-gen.txt \
-verbose -batch_size 1 \
-beam_size 10 \
-ban_unk_token \
-length_penalty none \
-out /tmp/gen
diff data/data_lm/gen-beam-sol.txt /tmp/gen && rm /tmp/gen
- name: Test LM generation with random sampling
run: |
python translate.py -model onmt/tests/test_model_lm.pt \
-src data/data_lm/src-gen.txt \
-verbose -batch_size 1 \
-beam_size 1 \
-seed 1 \
-random_sampling_topk -1 \
-random_sampling_temp 0.0001 \
-ban_unk_token \
-length_penalty none \
-out /tmp/gen
diff data/data_lm/gen-sampling-sol.txt /tmp/gen && rm /tmp/gen
- name: Test LM generation with random top-k/nucleus sampling
run: |
python translate.py -model onmt/tests/test_model_lm.pt \
-src data/data_lm/src-gen.txt \
-verbose -batch_size 1 \
-beam_size 1 \
-seed 3 \
-random_sampling_topk -1 \
-random_sampling_topp 0.95 \
-random_sampling_temp 1 \
-ban_unk_token \
-length_penalty none \
-out /tmp/gen
diff data/data_lm/gen-nucleus-sampling-sol$(python -c "import torch; print(torch.__version__[0])").txt /tmp/gen && rm /tmp/gen
- name: Test LM generation with random sampling multi-beams
run: |
python translate.py -model onmt/tests/test_model_lm.pt \
-src data/data_lm/src-gen.txt \
-verbose -batch_size 1 \
-beam_size 10 \
-seed 2 \
-random_sampling_topk 50 \
-random_sampling_topp 0.95 \
-random_sampling_temp 1 \
-length_penalty avg \
-ban_unk_token \
-min_length 5 \
-out /tmp/gen
diff data/data_lm/gen-sampling-beams-sol$(python -c "import torch; print(torch.__version__[0])").txt /tmp/gen && rm /tmp/gen
- name: Test py-LM inference engine
run: |
head data/src-test.txt > /tmp/src-test.txt
python onmt/tests/test_inference_engines.py \
-model onmt/tests/test_model_lm.pt \
-model_task lm \
-input_file /tmp/src-test.txt \
-inference_config_file data/inference-engine_py.yaml \
-inference_mode py \
-out /tmp/inference_engine_lm_py_outputs
- name: Test ct2-LM inference engine
run: |
head data/src-test.txt > /tmp/src-test.txt
python onmt/tests/test_inference_engines.py \
-model onmt/tests/test_model_lm_ct2 \
-model_task lm \
-input_file /tmp/src-test.txt \
-inference_config_file data/inference-engine_py.yaml \
-inference_mode ct2 \
-out /tmp/inference_engine_lm_py_outputs
- name: Test py-SEQ2SEQ inference engine
run: |
head data/src-test.txt > /tmp/src-test.txt
python onmt/tests/test_inference_engines.py \
-model onmt/tests/test_model.pt \
-model_task seq2seq \
-input_file /tmp/src-test.txt \
-inference_config_file data/inference-engine_py.yaml \
-inference_mode py \
-out /tmp/inference_engine_lm_py_outputs
- name: Test extract_vocabulary tool
run: |
python tools/extract_vocabulary.py \
-model onmt/tests/test_model.pt \
-side src \
-out_file /tmp/onmt.vocab.txt
if ! wc -l /tmp/onmt.vocab.txt | grep -qF "1002"
then echo "wrong word count" && exit 1
else
echo "create vocabulary pass"
fi
- name: Test embeddings_to_torch tool
run: |
python tools/embeddings_to_torch.py \
-emb_file_enc onmt/tests/sample_glove.txt \
-emb_file_dec onmt/tests/sample_glove.txt \
-dict_file onmt/tests/test_model.pt \
-output_file /tmp/q_gloveembeddings \
&& rm /tmp/q_gloveembeddings*
- name: Test extract_embeddings tool
run: |
python tools/extract_embeddings.py \
-model onmt/tests/test_model.pt
- name: Test checkpoint vocabulary update
run: |
python train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-hidden_size 2 \
-batch_size 10 \
-word_vec_size 5 \
-report_every 5\
-hidden_size 10 \
-num_workers 0 -bucket_size 1024 \
-train_steps 10 \
-save_model /tmp/onmt.model \
-save_checkpoint_steps 10
sed -i '1s/^/new_tok\t100000000\n/' /tmp/onmt.vocab.src
python train.py \
-config data/data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.tgt \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-hidden_size 2 \
-batch_size 10 \
-word_vec_size 5 \
-report_every 5\
-hidden_size 10 \
-train_steps 20 \
-num_workers 0 -bucket_size 1024 \
-update_vocab \
-reset_optim "states" \
-train_from /tmp/onmt.model_step_10.pt
- name: Test checkpoint vocabulary update with LM
run: |
python train.py \
-config data/lm_data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.src \
-model_task lm \
-encoder_type transformer_lm \
-decoder_type transformer_lm \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-dec_layers 2 -batch_size 10 \
-heads 4 -transformer_ff 64 \
-num_workers 0 -bucket_size 1024 \
-word_vec_size 16 -report_every 5 \
-save_model /tmp/lm.onmt.model \
-save_checkpoint_steps 10 \
-hidden_size 16 -train_steps 10
sed -i '1s/^/new_tok2\t100000000\n/' /tmp/onmt.vocab.src
python train.py \
-config data/lm_data.yaml \
-src_vocab /tmp/onmt.vocab.src \
-tgt_vocab /tmp/onmt.vocab.src \
-model_task lm \
-encoder_type transformer_lm \
-decoder_type transformer_lm \
-src_vocab_size 1000 \
-tgt_vocab_size 1000 \
-num_workers 0 -bucket_size 1024 \
-dec_layers 2 -batch_size 10 \
-heads 4 -transformer_ff 64 \
-word_vec_size 16 -report_every 5 \
-hidden_size 16 -train_steps 20 \
-update_vocab -reset_optim "states" \
-train_from /tmp/lm.onmt.model_step_10.pt
build-docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade setuptools
pip install -e .
pip install -r docs/requirements.txt
- name: Build docs
run: |
set -e
# Check that docs are built without errors
cd docs/ && make html && cd ..