diff --git a/libs/community/langchain_community/llms/openvino.py b/libs/community/langchain_community/llms/openvino.py index 0ef573869df74..f07f11d29f57f 100644 --- a/libs/community/langchain_community/llms/openvino.py +++ b/libs/community/langchain_community/llms/openvino.py @@ -121,7 +121,7 @@ def get_stop_flag(self) -> bool: """ return False - def put_word(self, word: str) -> None: + def put_word(self, word: Any) -> None: """ Puts a word into the text queue. @@ -208,7 +208,7 @@ def put(self, token_id: int) -> bool: else: pipe = openvino_genai.LLMPipeline(model_path, device) - config = openvino_genai.GenerationConfig() + config = pipe.get_generation_config() if tokenizer is None: tokenizer = pipe.get_tokenizer() streamer = ChunkStreamer(tokenizer) @@ -231,7 +231,7 @@ def _call( if stop is not None: self.config.stop_strings = set(stop) try: - import openvino + import openvino as ov import openvino_genai except ImportError: @@ -240,10 +240,13 @@ def _call( "Please install it with `pip install openvino-genai`." ) if not isinstance(self.tokenizer, openvino_genai.Tokenizer): - prompt = openvino.Tensor( - self.tokenizer.encode( - prompt, add_special_tokens=False, return_tensors="np" - ), + tokens = self.tokenizer( + prompt, add_special_tokens=False, return_tensors="np" + ) + input_ids = tokens["input_ids"] + attention_mask = tokens["attention_mask"] + prompt = openvino_genai.TokenizedInputs( + ov.Tensor(input_ids), ov.Tensor(attention_mask) ) output = self.pipe.generate(prompt, self.config) return output @@ -261,7 +264,7 @@ def _stream( if stop is not None: self.config.stop_strings = set(stop) try: - import openvino + import openvino as ov import openvino_genai except ImportError: @@ -270,12 +273,14 @@ def _stream( "Please install it with `pip install openvino-genai`." ) if not isinstance(self.tokenizer, openvino_genai.Tokenizer): - prompt = openvino.Tensor( - self.tokenizer.encode( - prompt, add_special_tokens=False, return_tensors="np" - ), + tokens = self.tokenizer( + prompt, add_special_tokens=False, return_tensors="np" + ) + input_ids = tokens["input_ids"] + attention_mask = tokens["attention_mask"] + prompt = openvino_genai.TokenizedInputs( + ov.Tensor(input_ids), ov.Tensor(attention_mask) ) - stream_complete = Event() def generate_and_signal_complete() -> None: