Skip to content

Commit

Permalink
update customized tokenizer
Browse files Browse the repository at this point in the history
update customized tokenizer
  • Loading branch information
openvino-dev-samples committed Jan 13, 2025
1 parent 84f7228 commit bddb5cb
Showing 1 changed file with 18 additions and 13 deletions.
31 changes: 18 additions & 13 deletions libs/community/langchain_community/llms/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_stop_flag(self) -> bool:
"""
return False

def put_word(self, word: str) -> None:
def put_word(self, word: Any) -> None:
"""
Puts a word into the text queue.
Expand Down Expand Up @@ -208,7 +208,7 @@ def put(self, token_id: int) -> bool:
else:
pipe = openvino_genai.LLMPipeline(model_path, device)

config = openvino_genai.GenerationConfig()
config = pipe.get_generation_config()
if tokenizer is None:
tokenizer = pipe.get_tokenizer()
streamer = ChunkStreamer(tokenizer)
Expand All @@ -231,7 +231,7 @@ def _call(
if stop is not None:
self.config.stop_strings = set(stop)
try:
import openvino
import openvino as ov
import openvino_genai

except ImportError:
Expand All @@ -240,10 +240,13 @@ def _call(
"Please install it with `pip install openvino-genai`."
)
if not isinstance(self.tokenizer, openvino_genai.Tokenizer):
prompt = openvino.Tensor(
self.tokenizer.encode(
prompt, add_special_tokens=False, return_tensors="np"
),
tokens = self.tokenizer(
prompt, add_special_tokens=False, return_tensors="np"
)
input_ids = tokens["input_ids"]
attention_mask = tokens["attention_mask"]
prompt = openvino_genai.TokenizedInputs(
ov.Tensor(input_ids), ov.Tensor(attention_mask)
)
output = self.pipe.generate(prompt, self.config)
return output
Expand All @@ -261,7 +264,7 @@ def _stream(
if stop is not None:
self.config.stop_strings = set(stop)
try:
import openvino
import openvino as ov
import openvino_genai

except ImportError:
Expand All @@ -270,12 +273,14 @@ def _stream(
"Please install it with `pip install openvino-genai`."
)
if not isinstance(self.tokenizer, openvino_genai.Tokenizer):
prompt = openvino.Tensor(
self.tokenizer.encode(
prompt, add_special_tokens=False, return_tensors="np"
),
tokens = self.tokenizer(
prompt, add_special_tokens=False, return_tensors="np"
)
input_ids = tokens["input_ids"]
attention_mask = tokens["attention_mask"]
prompt = openvino_genai.TokenizedInputs(
ov.Tensor(input_ids), ov.Tensor(attention_mask)
)

stream_complete = Event()

def generate_and_signal_complete() -> None:
Expand Down

0 comments on commit bddb5cb

Please sign in to comment.