Skip to content

Commit

Permalink
make skip_tokens an input for VocabDecode (parametrize detokenization…
Browse files Browse the repository at this point in the history
…/decoding) (#325)

* add VocabDecode with input skip_tokens

* always keep skip_tokens const
  • Loading branch information
pavel-esir authored Nov 25, 2024
1 parent c6af44c commit b961c74
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 12 deletions.
10 changes: 4 additions & 6 deletions python/openvino_tokenizers/hf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def decoding(self) -> None:
if self.tokenizer_json["decoder"] is None or self.tokenizer_json["model"]["type"] == "WordPiece":
return

skip_tokens = parse_special_tokens(self.original_tokenizer) if self.skip_special_tokens else {}
self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens)))
skip_tokens = parse_special_tokens(self.original_tokenizer)
self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens), do_skip_tokens=self.skip_special_tokens))

if self.tokenizer_json["decoder"]["type"] == "Sequence":
for decoder_dict in self.tokenizer_json["decoder"]["decoders"]:
Expand Down Expand Up @@ -1013,9 +1013,7 @@ def convert_tiktoken_model_tokenizer(
split_pattern = encoding._pat_str

pipeline = TokenizerPipeline()
skip_tokens = []
if params.skip_special_tokens:
skip_tokens = list(parse_special_tokens(hf_tokenizer))
skip_tokens = list(parse_special_tokens(hf_tokenizer))

add_prefix_steps = []
if hasattr(hf_tokenizer, "get_prefix_tokens") and params.add_special_tokens:
Expand Down Expand Up @@ -1048,7 +1046,7 @@ def convert_tiktoken_model_tokenizer(

pipeline.add_steps(
[
VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens),
VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens, do_skip_tokens=params.skip_special_tokens),
FuseStep(),
]
)
Expand Down
16 changes: 14 additions & 2 deletions python/openvino_tokenizers/tokenizer_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
from openvino.runtime import Model, Output, PartialShape, Type, op
from openvino.runtime import Model, Output, PartialShape, Type, op, Shape
from openvino.runtime import opset12 as opset
from openvino.runtime.exceptions import OVTypeError, UserInputError
from openvino.runtime.utils.types import as_node, make_constant_node
Expand Down Expand Up @@ -1008,6 +1008,7 @@ class DecodingStep(BasePipelineStep):
class VocabDecoderStep(DecodingStep):
vocab: Optional[List[str]] = None
skip_tokens: Optional[List[int]] = None
do_skip_tokens: Optional[bool] = True

def finalize(self) -> None:
pipeline = self.get_pipeline()
Expand All @@ -1025,7 +1026,18 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]:
else:
vocab_outputs = self.create_string_constant_node(self.vocab).outputs()
input_nodes.extend(vocab_outputs)
return _get_factory().create("VocabDecoder", input_nodes, {"skip_tokens": self.skip_tokens}).outputs()

# Put constant with skip tokens even if do_skip_tokens=False, so that it can be switched on/off at runtime.
# Slice through all skip tokens if flag is true, else slice to get an empty tensor.
stop_const = op.Constant(Type.i32, Shape([1]), [np.iinfo(np.int32).max if self.do_skip_tokens else 0])

zero_const = op.Constant(Type.i32, Shape([1]), [0])
one_const = op.Constant(Type.i32, Shape([1]), [1])
skip_tokens_const = op.Constant(Type.i32, Shape([len(self.skip_tokens)]), self.skip_tokens)
sliced_skips = opset.slice(skip_tokens_const, zero_const, stop_const, one_const).outputs()
input_nodes.extend(sliced_skips)

return _get_factory().create("VocabDecoder", input_nodes).outputs()


@dataclass
Expand Down
14 changes: 11 additions & 3 deletions src/vocab_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,20 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i
auto seq_len = inputs[0].get_shape()[1];
auto input_data = inputs[0].data<const int32_t>();

auto vocab_size = inputs[1].get_size();
auto vocab_begins = inputs[1].data<const int32_t>();
auto vocab_ends = inputs[2].data<const int32_t>();
auto vocab_chars = inputs[3].data<const uint8_t>();
auto vocab_size = inputs[1].get_size();

OPENVINO_ASSERT(inputs.size() == 4, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern");
OPENVINO_ASSERT(inputs.size() == 4 || inputs.size() == 5, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern");

// Use skip tokens from input if specified, otherwise use the attribute.
std::vector<int> skip_tokens;
if (inputs.size() == 5) {
skip_tokens = std::vector<int>(inputs[4].data<int32_t>(), inputs[4].data<int32_t>() + inputs[4].get_shape()[0]);
} else {
skip_tokens = m_skip_tokens;
}

// Set output shapes
outputs[0].set_shape({batch_size});
Expand Down Expand Up @@ -61,7 +69,7 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i
new_begins[seq] = buffer.size();
if (
token_id < vocab_size
&& std::find(m_skip_tokens.begin(), m_skip_tokens.end(), token_id) == m_skip_tokens.end()
&& std::find(skip_tokens.begin(), skip_tokens.end(), token_id) == skip_tokens.end()
) {
buffer.insert(
buffer.end(),
Expand Down
15 changes: 14 additions & 1 deletion src/vocab_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,24 @@
#include <vector>
#include <openvino/op/op.hpp>

/**
* @class VocabDecoder
* @brief The VocabDecoder class is an OpenVINO operation that decodes vocabulary tokens.
*
* This class inherits from the ov::op::Op base class and provides functionality to decode
* vocabulary tokens while skipping specified tokens.
* @param input_data
* @param vocab_begins
* @param vocab_ends
* @param vocab_chars
* @param skip_tokens input has priority over the attribute.
* @param skip_tokens attribute is used only when skip_tokens input is not provided.
*/
class VocabDecoder : public ov::op::Op {
public:
OPENVINO_OP("VocabDecoder");

VocabDecoder () = default;
VocabDecoder () = default;
VocabDecoder(
const ov::OutputVector& arguments,
std::vector<int> skip_tokens
Expand Down

0 comments on commit b961c74

Please sign in to comment.