diff --git a/README.md b/README.md index da84c972a..794f1352b 100644 --- a/README.md +++ b/README.md @@ -53,12 +53,13 @@ You can use our Hugging Face integration to run inference on the OLMo Transforme ```python from transformers import AutoModelForCausalLM, AutoTokenizer - olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-2-1124-7B") tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-1124-7B") - message = ["Language modeling is "] inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False) +# optional verifying cuda +# inputs = {k: v.to('cuda') for k,v in inputs.items()} +# olmo = olmo.to('cuda') response = olmo.generate(**inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95) print(tokenizer.batch_decode(response, skip_special_tokens=True)[0]) ```