sudo apt-get update && sudo apt-get install git python3-pip
git clone --depth=1 https://github.com/dusty-nv/jetson-containers
cd jetson-containers pip3 install -r requirements.txt
cd ./data && git clone https://github.com/LJ-Hao/MLC-LLM-on-Jetson.git && cd ..
./run.sh --env HUGGINGFACE_TOKEN=<YOUR-ACCESS-TOKEN> $(./autotag mlc) /bin/bash -c 'ln -s $(huggingface-downloader meta-llama/Llama-2-7b-chat-hf) /data/models/mlc/dist/models/Llama-2-7b-chat-hf'
use sudo docker images
to check wether the image is installed or not
./run.sh $(./autotag mlc) \
python3 -m mlc_llm.build \
--model Llama-2-7b-chat-hf \
--quantization q4f16_ft \
--artifact-path /data/models/mlc/dist \
--max-seq-len 4096 \
--target cuda \
--use-cuda-graph \
--use-flash-attn-mqa
./run.sh <YOUR IMAGE NAME>
#for me dustynv/mlc:51fb0f4-builder-r35.4.1 check result of first step
cd /data/MLC-LLM-on-Jetson && python3 Llama-2-7b-chat-hf.py
here is the result: https://github.com/dusty-nv/jetson-containers you can see without quanifing with MLC, Jetson Nano 16GB can load the model but cant not run
cd /data/MLC-LLM-on-Jetson && python3 Llama-2-7b-chat-hf-q4f16_ft.py
here is the result: you can see with quanifing with MLC, Jetson Nano 16GB can run