diff --git a/sharktank/sharktank/examples/export_paged_llm_v1.py b/sharktank/sharktank/examples/export_paged_llm_v1.py index 7f35387ca..3ef0b9bac 100644 --- a/sharktank/sharktank/examples/export_paged_llm_v1.py +++ b/sharktank/sharktank/examples/export_paged_llm_v1.py @@ -96,9 +96,9 @@ def generate_params_json(hp, prefill_bs: list[int], decode_bs: list[int]): "prefill_batch_sizes": prefill_bs, "decode_batch_sizes": decode_bs, "transformer_block_count": hp.block_count, + "block_seq_stride": llama_config.block_seq_stride, "paged_kv_cache": { "attention_head_count_kv": hp.attention_head_count_kv, - "block_seq_stride": llama_config.block_seq_stride, "device_block_count": 256, # so that this makes its way into the config file & can be edited. }, }