diff --git a/python/openvino_tokenizers/hf_parser.py b/python/openvino_tokenizers/hf_parser.py index 75d357d7..1476c3a6 100644 --- a/python/openvino_tokenizers/hf_parser.py +++ b/python/openvino_tokenizers/hf_parser.py @@ -375,8 +375,8 @@ def decoding(self) -> None: if self.tokenizer_json["decoder"] is None or self.tokenizer_json["model"]["type"] == "WordPiece": return - skip_tokens = parse_special_tokens(self.original_tokenizer) if self.skip_special_tokens else {} - self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens))) + skip_tokens = parse_special_tokens(self.original_tokenizer) + self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens), do_skip_tokens=self.skip_special_tokens)) if self.tokenizer_json["decoder"]["type"] == "Sequence": for decoder_dict in self.tokenizer_json["decoder"]["decoders"]: @@ -1013,9 +1013,7 @@ def convert_tiktoken_model_tokenizer( split_pattern = encoding._pat_str pipeline = TokenizerPipeline() - skip_tokens = [] - if params.skip_special_tokens: - skip_tokens = list(parse_special_tokens(hf_tokenizer)) + skip_tokens = list(parse_special_tokens(hf_tokenizer)) add_prefix_steps = [] if hasattr(hf_tokenizer, "get_prefix_tokens") and params.add_special_tokens: @@ -1048,7 +1046,7 @@ def convert_tiktoken_model_tokenizer( pipeline.add_steps( [ - VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens), + VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens, do_skip_tokens=params.skip_special_tokens), FuseStep(), ] ) diff --git a/python/openvino_tokenizers/tokenizer_pipeline.py b/python/openvino_tokenizers/tokenizer_pipeline.py index 52417a3b..734a04d5 100644 --- a/python/openvino_tokenizers/tokenizer_pipeline.py +++ b/python/openvino_tokenizers/tokenizer_pipeline.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import numpy as np -from openvino.runtime import Model, Output, PartialShape, Type, op +from openvino.runtime import Model, Output, PartialShape, Type, op, Shape from openvino.runtime import opset12 as opset from openvino.runtime.exceptions import OVTypeError, UserInputError from openvino.runtime.utils.types import as_node, make_constant_node @@ -1008,6 +1008,7 @@ class DecodingStep(BasePipelineStep): class VocabDecoderStep(DecodingStep): vocab: Optional[List[str]] = None skip_tokens: Optional[List[int]] = None + do_skip_tokens: Optional[bool] = True def finalize(self) -> None: pipeline = self.get_pipeline() @@ -1025,7 +1026,18 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: else: vocab_outputs = self.create_string_constant_node(self.vocab).outputs() input_nodes.extend(vocab_outputs) - return _get_factory().create("VocabDecoder", input_nodes, {"skip_tokens": self.skip_tokens}).outputs() + + # Put constant with skip tokens even if do_skip_tokens=False, so that it can be switched on/off at runtime. + # Slice through all skip tokens if flag is true, else slice to get an empty tensor. + stop_const = op.Constant(Type.i32, Shape([1]), [np.iinfo(np.int32).max if self.do_skip_tokens else 0]) + + zero_const = op.Constant(Type.i32, Shape([1]), [0]) + one_const = op.Constant(Type.i32, Shape([1]), [1]) + skip_tokens_const = op.Constant(Type.i32, Shape([len(self.skip_tokens)]), self.skip_tokens) + sliced_skips = opset.slice(skip_tokens_const, zero_const, stop_const, one_const).outputs() + input_nodes.extend(sliced_skips) + + return _get_factory().create("VocabDecoder", input_nodes).outputs() @dataclass diff --git a/src/vocab_decoder.cpp b/src/vocab_decoder.cpp index 35e9dfd2..f7eac356 100644 --- a/src/vocab_decoder.cpp +++ b/src/vocab_decoder.cpp @@ -25,12 +25,20 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i auto seq_len = inputs[0].get_shape()[1]; auto input_data = inputs[0].data(); + auto vocab_size = inputs[1].get_size(); auto vocab_begins = inputs[1].data(); auto vocab_ends = inputs[2].data(); auto vocab_chars = inputs[3].data(); - auto vocab_size = inputs[1].get_size(); - OPENVINO_ASSERT(inputs.size() == 4, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern"); + OPENVINO_ASSERT(inputs.size() == 4 || inputs.size() == 5, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern"); + + // Use skip tokens from input if specified, otherwise use the attribute. + std::vector skip_tokens; + if (inputs.size() == 5) { + skip_tokens = std::vector(inputs[4].data(), inputs[4].data() + inputs[4].get_shape()[0]); + } else { + skip_tokens = m_skip_tokens; + } // Set output shapes outputs[0].set_shape({batch_size}); @@ -61,7 +69,7 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i new_begins[seq] = buffer.size(); if ( token_id < vocab_size - && std::find(m_skip_tokens.begin(), m_skip_tokens.end(), token_id) == m_skip_tokens.end() + && std::find(skip_tokens.begin(), skip_tokens.end(), token_id) == skip_tokens.end() ) { buffer.insert( buffer.end(), diff --git a/src/vocab_decoder.hpp b/src/vocab_decoder.hpp index bf9ada0c..f8750f4b 100644 --- a/src/vocab_decoder.hpp +++ b/src/vocab_decoder.hpp @@ -6,11 +6,24 @@ #include #include +/** + * @class VocabDecoder + * @brief The VocabDecoder class is an OpenVINO operation that decodes vocabulary tokens. + * + * This class inherits from the ov::op::Op base class and provides functionality to decode + * vocabulary tokens while skipping specified tokens. + * @param input_data + * @param vocab_begins + * @param vocab_ends + * @param vocab_chars + * @param skip_tokens input has priority over the attribute. + * @param skip_tokens attribute is used only when skip_tokens input is not provided. + */ class VocabDecoder : public ov::op::Op { public: OPENVINO_OP("VocabDecoder"); - VocabDecoder () = default; + VocabDecoder () = default; VocabDecoder( const ov::OutputVector& arguments, std::vector skip_tokens