Skip to content

Commit

Permalink
Some fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Dec 27, 2024
1 parent 87db5ab commit 7c9a6f5
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ int main(int argc, char* argv[]) try {
config.max_new_tokens = 20;
config.num_beam_groups = 3;
config.num_beams = 15;
config.diversity_penalty = 1.0f;
config.num_return_sequences = config.num_beams;

// Since the streamer is set, the results will
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def main():
config.max_new_tokens = 20
config.num_beam_groups = 3
config.num_beams = 15
config.diversity_penalty = 1
config.num_return_sequences = config.num_beams

beams = pipe.generate(args.prompts, config)
Expand Down
20 changes: 13 additions & 7 deletions src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,14 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
read_json_param(data, "stop_strings", stop_strings);
// note that include_stop_str_in_output is not present in HF GenerationConfig
read_json_param(data, "include_stop_str_in_output", include_stop_str_in_output);
// note that stop_token_ids is not present in HF GenerationConfig
read_json_param(data, "stop_token_ids", stop_token_ids);
// note that stop_token_ids is not present in HF GenerationConfig, but some generation_config.json define
// multiple eos_token_id (e.g. https://huggingface.co/OpenGVLab/InternVL2-4B/blob/main/generation_config.json)
// so, we need to read them as 'stop_token_ids'
read_json_param(data, "eos_token_id", stop_token_ids);

if (eos_token_id == -1 && !stop_token_ids.empty()) {
eos_token_id = *stop_token_ids.begin();
}

// note that echo is not present in HF GenerationConfig
read_json_param(data, "echo", echo);
Expand Down Expand Up @@ -195,7 +201,7 @@ void GenerationConfig::validate() const {
"ignore_eos is true, in this case either 'max_new_tokens', or 'max_length' should be defined.");

OPENVINO_ASSERT(eos_token_id != -1 || !stop_token_ids.empty() || !stop_strings.empty() || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
"Either 'eos_token_id', or 'stop_token_ids', or ''stop_strings'', or 'max_new_tokens', or 'max_length' should be defined.");
"Either 'eos_token_id', or 'stop_token_ids', or 'stop_strings', or 'max_new_tokens', or 'max_length' should be defined.");

OPENVINO_ASSERT(max_new_tokens > 0 || (max_new_tokens == 0 && echo), "'max_new_tokens' must be greater than 0, if `echo` is set, 0 is also accepted");
OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens");
Expand All @@ -217,8 +223,8 @@ void GenerationConfig::validate() const {
}

if (is_multinomial()) {
OPENVINO_ASSERT(top_k > 0, "When 'do_sample' is true, top_k must be a strictly positive, but got ", top_k);
OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0 and < 1, but got ", top_p);
OPENVINO_ASSERT(top_k >= 0, "When 'do_sample' is true, top_k must be a non-negative, but got ", top_k);
OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0.0 and <= 1.0, but got ", top_p);
OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature);
} else {
// parameters requiring multinomial
Expand Down Expand Up @@ -246,8 +252,8 @@ void GenerationConfig::validate() const {
// parameters requiring beam search
OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to non default value 0.0f, but got ", diversity_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to non default value 1.0f, but got ", length_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to ", diversity_penalty, " (default is 0.0f), which is supported only by beam search sampling");
OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to ", length_penalty, " (default is 1.0f), which is supported only by beam search sampling");
}

// assistant generation
Expand Down
12 changes: 12 additions & 0 deletions src/cpp/src/json_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

#pragma once

#include <vector>
#include <set>

#include <nlohmann/json.hpp>

namespace ov {
Expand Down Expand Up @@ -40,6 +43,15 @@ void read_json_param(const nlohmann::json& data, const std::string& name, std::v
}
}

template <typename V>
void read_json_param(const nlohmann::json& data, const std::string& name, std::set<V>& param) {
if (data.contains(name) && data[name].is_array()) {
for (const auto elem : data[name]) {
param.insert(elem.get<V>());
}
}
}

} // namespace utils
} // namespace genai
} // namespace ov
2 changes: 2 additions & 0 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,8 @@ class GenerationConfig:
...
def is_greedy_decoding(self) -> bool:
...
def is_multinomial(self) -> bool:
...
def is_prompt_lookup(self) -> bool:
...
def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
Expand Down
1 change: 1 addition & 0 deletions src/python/py_generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ void init_generation_config(py::module_& m) {
.def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
.def("is_beam_search", &GenerationConfig::is_beam_search)
.def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding)
.def("is_multinomial", &GenerationConfig::is_multinomial)
.def("is_assisting_generation", &GenerationConfig::is_assisting_generation)
.def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup)
.def("validate", &GenerationConfig::validate)
Expand Down
4 changes: 2 additions & 2 deletions tests/python_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def convert_to_hf(
kwargs['pad_token_id'] = default_generation_config.pad_token_id
kwargs['repetition_penalty'] = generation_config.repetition_penalty

if generation_config.num_beams > 1:
if generation_config.is_beam_search():
# beam search case
kwargs['num_beam_groups'] = generation_config.num_beam_groups
kwargs['num_beams'] = generation_config.num_beams
Expand All @@ -309,7 +309,7 @@ def convert_to_hf(
kwargs['output_scores'] = True
if generation_config.num_beam_groups > 1:
kwargs['diversity_penalty'] = generation_config.diversity_penalty
elif generation_config.do_sample:
elif generation_config.is_multinomial():
# mulitinomial
kwargs['temperature'] = generation_config.temperature
kwargs['top_k'] = generation_config.top_k
Expand Down
38 changes: 38 additions & 0 deletions tests/python_tests/test_generation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# SPDX-License-Identifier: Apache-2.0

from openvino_genai import GenerationConfig
from typing import Tuple, List
import json
import os
import pytest

configs = [
Expand Down Expand Up @@ -94,3 +97,38 @@ def test_invalid_generation_configs_throws(generation_config):
config = GenerationConfig()
with pytest.raises(RuntimeError):
config.update_generation_config(**generation_config)


def load_genai_generation_config_from_file(configs: List[Tuple], temp_path):
for json_file in temp_path.glob("*.json"):
json_file.unlink()

for config_json, config_name in configs:
with (temp_path / config_name).open('w') as f:
json.dump(config_json, f)

ov_generation_config = GenerationConfig(temp_path / "generation_config.json")

for _, config_name in configs:
os.remove(temp_path / config_name)

return ov_generation_config

@pytest.mark.precommit
@pytest.mark.nightly
def test_multiple_eos_are_read_as_stop_token_ids(tmp_path):
generation_config_json = {
"eos_token_id": [
2,
32000,
32007
]
}
configs = [
(generation_config_json, "generation_config.json"),
]

generation_config = load_genai_generation_config_from_file(configs, tmp_path)

assert generation_config.eos_token_id == 2
assert generation_config.stop_token_ids == { 2, 32000, 32007 }
13 changes: 9 additions & 4 deletions tests/python_tests/test_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (C) 2023-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os
import pytest
import numpy as np
from transformers import AutoTokenizer
Expand All @@ -17,15 +18,19 @@


def load_genai_tokenizer_with_configs(configs: List[Tuple], temp_path):
# load Tokenizer where all configs are cleared.
# remove existing jsons from previous tests
for json_file in temp_path.glob("*.json"):
json_file.unlink()

for config_json, config_name in configs:
with (temp_path / config_name).open('w') as f:
json.dump(config_json, f)
return openvino_genai.Tokenizer(temp_path)

ov_tokenizer = openvino_genai.Tokenizer(temp_path)

for _, config_name in configs:
os.remove(temp_path / config_name)

return ov_tokenizer


def get_chat_templates():
Expand Down Expand Up @@ -181,7 +186,7 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
@pytest.mark.nightly
def test_set_chat_template():
model_descr = get_chat_models_list()[0]
model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))

prompt = "how are you?"
dummy_conversation = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,6 @@ Dataset filtered_dataset(const std::string& models_path, const std::string& data
ov::genai::GenerationConfig greedy_search = ov::genai::greedy();
greedy_search.max_new_tokens = std::min(max_output_len, output_len);
greedy_search.ignore_eos = true;
greedy_search.repetition_penalty = 1.0;
greedy_search.frequency_penalty = 0.0;
greedy_search.presence_penalty = 0.0;
greedy_search.diversity_penalty = 0.0;
greedy_search.length_penalty = 0.0;

dataset.push_data(human_question, greedy_search);
dataset.push_lens(input_len, output_len);
Expand Down

0 comments on commit 7c9a6f5

Please sign in to comment.