Skip to content

Commit

Permalink
update llama triton example (#1153)
Browse files Browse the repository at this point in the history
* update llama triton example

* update start_ids.csv

* fix lint
  • Loading branch information
zhyncs authored Feb 21, 2024
1 parent 24beeb6 commit 685070f
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ work_dir*/
*.log
*.out
*.csv
!start_ids.csv
*.pkl

!CMakeLists.txt
Expand Down
7 changes: 7 additions & 0 deletions examples/cpp/llama/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
How to generate start_ids.csv

```bash
# update `model_file` path and `encode_line` content according to the actual situation
python3 tokenizer.py --model_file /workdir/llama2_13b_chat/tokenizer.model --encode_line 'LMDeploy is a toolkit for compressing, deploying, and serving LLMs.'
# refer to tokenizer.py for more usage scenarios
```
1 change: 1 addition & 0 deletions examples/cpp/llama/llama_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ data_type=fp16
enable_custom_all_reduce=0
pipeline_para_size=1
tensor_para_size=1
; update model_dir path according to the actual situation
model_dir=/workspace/models/triton_models/weights/


Expand Down
11 changes: 7 additions & 4 deletions examples/cpp/llama/llama_triton_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ int read_start_ids(size_t batch_size,
std::string file_name);

std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>>
prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std::vector<void*>* pointer_record)
prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std::vector<void*>* pointer_record, const std::string& csv_name)
{
INIReader reader = INIReader(ini_name);
if (reader.ParseError() < 0) {
Expand All @@ -279,7 +279,7 @@ prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std
max_input_len,
end_id,
1,
"../examples/cpp/llama/start_ids.csv");
csv_name);
// drop requests > request_batch_size
if (v_start_lengths.size() > request_batch_size) {
v_start_lengths.resize(request_batch_size);
Expand Down Expand Up @@ -363,6 +363,7 @@ int main(int argc, char* argv[])
// Note: Only supports that all nodes have same gpu count
const int gpu_count = ft::getDeviceCount();
const int world_size = node_num * gpu_count;
printf("Recommend to specify the first parameter on the command line as the path to llama_config.ini\n");
std::string ini_name = argc >= 2 ? std::string(argv[1]) : "../examples/cpp/llama/llama_config.ini";

// step 1: Create model
Expand All @@ -372,7 +373,7 @@ int main(int argc, char* argv[])
printf(
"world_size=%d tensor_para_size=%d pipeline_para_size=%d\n", world_size, tensor_para_size, pipeline_para_size);
FT_CHECK_WITH_INFO(world_size == (tensor_para_size * pipeline_para_size),
"World Size != Tensor Parallel Size * Pipeline Parallel Size !");
"World Size != Tensor Parallel Size * Pipeline Parallel Size ! Maybe you can use CUDA_VISIBLE_DEVICES.");

std::cout << model->toString();

Expand Down Expand Up @@ -402,10 +403,12 @@ int main(int argc, char* argv[])
}

// step 4: prepare request
printf("Recommend to specify the second parameter on the command line as the path to start_ids.csv\n");
std::string csv_name = argc >= 3 ? std::string(argv[2]) : "../examples/cpp/llama/start_ids.csv";
std::vector<void*> pointer_record; // Used to prevent the pointers are
// release after leaving functions
std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>> request_list =
prepareRequest(ini_name, node_id, gpu_count, &pointer_record);
prepareRequest(ini_name, node_id, gpu_count, &pointer_record, csv_name);
printf("[INFO] request is created \n");

// step 5: Forward
Expand Down
1 change: 1 addition & 0 deletions examples/cpp/llama/start_ids.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1,365,5773,1022,2376,338,263,5780,7354,363,27122,292,29892,7246,292,29892,322,16330,365,26369,29879,29889
10 changes: 9 additions & 1 deletion examples/cpp/llama/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def decode(self, t: List[int]):

def main(model_file: str = '/data/llama/model/tokenizer.model',
encode_file: str = None,
decode_file: str = None):
decode_file: str = None,
encode_line: str = None):
tokenizer = Tokenizer(model_file)
if encode_file:
with open(encode_file, 'r') as f:
Expand All @@ -59,6 +60,13 @@ def main(model_file: str = '/data/llama/model/tokenizer.model',
_token_ids = [int(token_id) for token_id in _token_ids]
ys = tokenizer.decode(_token_ids)
print(ys)
elif encode_line:
xs = tokenizer.encode(encode_line)
xs = ','.join(map(str, xs))
print(xs)
output_dir = osp.dirname(osp.abspath(__file__))
with open(osp.join(output_dir, 'start_ids.csv'), 'w') as f:
f.write(xs)
else:
first = True
while True:
Expand Down

0 comments on commit 685070f

Please sign in to comment.