From 806f833800ea4fdab5da8bba4b6db347382cd8b6 Mon Sep 17 00:00:00 2001 From: murrellb Date: Thu, 28 Nov 2024 22:48:05 +0100 Subject: [PATCH] Tweaking readme and docstrings because the normal samplers now run on the GPU --- README.md | 16 ---------------- src/utils.jl | 2 ++ 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 9628add..121e4f9 100644 --- a/README.md +++ b/README.md @@ -155,22 +155,6 @@ generate(model, prompt, device = gpu); #Note the device keyword ``` -If you're using one of the trickier samplers, some CPU operations are needed for sampling. So you need to pass `device = cpu` to the sampler, while passing `device = gpu` to the `generate` function: - -```julia -#Put the model on the GPU -model = gpu(model) - -prompt = smollm2_assistant_prompt(tkn,"Tell me the two worst things about Python.") -generate(model, prompt, - max_new_tokens=500, - tokenizer_for_printing=tkn, - end_token = encode(tkn, "<|im_end|>")[end], - sampler = top_nσ_sampler(; device = cpu), #cpu for the sampler - device = gpu, #gpu for generate - ); -``` - And if you're training, the data needs to be on the GPU: ```julia diff --git a/src/utils.jl b/src/utils.jl index cd1df4b..3942149 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -191,6 +191,8 @@ eos = encode(tkn, "<|im_end|>")[end] prompt = smollm2_instruct_prompt(tkn, "You are an expert in Statistics and Probability Theory who answers questions in as few words as possible.",question) generate(model, prompt, max_new_tokens=100, tokenizer_for_printing=tkn, end_token = eos, sampler = structured_choice(choices, vocab, eos)); ``` + +If you want to run the model on the GPU, then you need to pass `device = gpu` to the `generate` function, and `device = cpu` to the `structured_choice` function. """ function structured_choice(choices::Vector{String}, vocab::Vector{String}, end_token::Int; sampler = logits -> argmax_sampler(logits), device = identity) remaining_choices = copy(choices)