diff --git a/automodel.py b/automodel.py index 7108c3a1aa..af4e13b07f 100644 --- a/automodel.py +++ b/automodel.py @@ -45,9 +45,18 @@ def completion_tensors( temperature: float, top_p: float, ): + self.model.eval() # Not essential, but just in case. + inputs = self.tokenizer( - prompts, padding=True, return_tensors="pt", return_token_type_ids=False + prompts, + padding=True, + return_tensors="pt", + return_token_type_ids=False, + truncation=True, + max_length=max_length - 1, ).to("cuda") + + with torch.no_grad(): output = self.model.generate( **inputs,