Some fixes

openvinotoolkit · Dec 27, 2024 · 7c9a6f5 · 7c9a6f5
1 parent 87db5ab
commit 7c9a6f5
Show file tree

Hide file tree

Showing 10 changed files with 79 additions and 18 deletions.
diff --git a/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp b/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
@@ -17,6 +17,7 @@ int main(int argc, char* argv[]) try {
     config.max_new_tokens = 20;
     config.num_beam_groups = 3;
     config.num_beams = 15;
+    config.diversity_penalty = 1.0f;
     config.num_return_sequences = config.num_beams;
 
     // Since the streamer is set, the results will

diff --git a/samples/python/beam_search_causal_lm/beam_search_causal_lm.py b/samples/python/beam_search_causal_lm/beam_search_causal_lm.py
@@ -19,6 +19,7 @@ def main():
     config.max_new_tokens = 20
     config.num_beam_groups = 3
     config.num_beams = 15
+    config.diversity_penalty = 1
     config.num_return_sequences = config.num_beams
 
     beams = pipe.generate(args.prompts, config)

diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
@@ -33,8 +33,14 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
     read_json_param(data, "stop_strings", stop_strings);
     // note that include_stop_str_in_output is not present in HF GenerationConfig
     read_json_param(data, "include_stop_str_in_output", include_stop_str_in_output);
-    // note that stop_token_ids is not present in HF GenerationConfig
-    read_json_param(data, "stop_token_ids", stop_token_ids);
+    // note that stop_token_ids is not present in HF GenerationConfig, but some generation_config.json define
+    // multiple eos_token_id (e.g. https://huggingface.co/OpenGVLab/InternVL2-4B/blob/main/generation_config.json)
+    // so, we need to read them as 'stop_token_ids'
+    read_json_param(data, "eos_token_id", stop_token_ids);
+
+    if (eos_token_id == -1 && !stop_token_ids.empty()) {
+        eos_token_id = *stop_token_ids.begin();
+    }
 
     // note that echo is not present in HF GenerationConfig
     read_json_param(data, "echo", echo);
@@ -195,7 +201,7 @@ void GenerationConfig::validate() const {
                     "ignore_eos is true, in this case either 'max_new_tokens', or 'max_length' should be defined.");
 
     OPENVINO_ASSERT(eos_token_id != -1 || !stop_token_ids.empty() || !stop_strings.empty() || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
-                    "Either 'eos_token_id', or 'stop_token_ids', or ''stop_strings'', or 'max_new_tokens', or 'max_length' should be defined.");
+                    "Either 'eos_token_id', or 'stop_token_ids', or 'stop_strings', or 'max_new_tokens', or 'max_length' should be defined.");
 
     OPENVINO_ASSERT(max_new_tokens > 0 || (max_new_tokens == 0 && echo), "'max_new_tokens' must be greater than 0, if `echo` is set, 0 is also accepted");
     OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens");
@@ -217,8 +223,8 @@ void GenerationConfig::validate() const {
     }
 
     if (is_multinomial()) {
-        OPENVINO_ASSERT(top_k > 0, "When 'do_sample' is true, top_k must be a strictly positive, but got ", top_k);
-        OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0 and < 1, but got ", top_p);
+        OPENVINO_ASSERT(top_k >= 0, "When 'do_sample' is true, top_k must be a non-negative, but got ", top_k);
+        OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0.0 and <= 1.0, but got ", top_p);
         OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature);
     } else {
         // parameters requiring multinomial
@@ -246,8 +252,8 @@ void GenerationConfig::validate() const {
         // parameters requiring beam search
         OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
         OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
-        OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to non default value 0.0f, but got ", diversity_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
-        OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to non default value 1.0f, but got ", length_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
+        OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to ", diversity_penalty, " (default is 0.0f), which is supported only by beam search sampling");
+        OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to ", length_penalty, " (default is 1.0f), which is supported only by beam search sampling");
     }
 
     // assistant generation

diff --git a/src/cpp/src/json_utils.hpp b/src/cpp/src/json_utils.hpp
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <vector>
+#include <set>
+
 #include <nlohmann/json.hpp>
 
 namespace ov {
@@ -40,6 +43,15 @@ void read_json_param(const nlohmann::json& data, const std::string& name, std::v
     }
 }
 
+template <typename V>
+void read_json_param(const nlohmann::json& data, const std::string& name, std::set<V>& param) {
+    if (data.contains(name) && data[name].is_array()) {
+        for (const auto elem : data[name]) {
+            param.insert(elem.get<V>());
+        }
+    }
+}
+
 }  // namespace utils
 }  // namespace genai
 }  // namespace ov
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -609,6 +609,8 @@ class GenerationConfig:
         ...
     def is_greedy_decoding(self) -> bool:
         ...
+    def is_multinomial(self) -> bool:
+        ...
     def is_prompt_lookup(self) -> bool:
         ...
     def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:

diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
@@ -118,6 +118,7 @@ void init_generation_config(py::module_& m) {
         .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
         .def("is_beam_search", &GenerationConfig::is_beam_search)
         .def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding)
+        .def("is_multinomial", &GenerationConfig::is_multinomial)
         .def("is_assisting_generation", &GenerationConfig::is_assisting_generation)
         .def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup)
         .def("validate", &GenerationConfig::validate)

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
@@ -299,7 +299,7 @@ def convert_to_hf(
     kwargs['pad_token_id'] = default_generation_config.pad_token_id
     kwargs['repetition_penalty'] = generation_config.repetition_penalty
 
-    if generation_config.num_beams > 1:
+    if generation_config.is_beam_search():
         # beam search case
         kwargs['num_beam_groups'] = generation_config.num_beam_groups
         kwargs['num_beams'] = generation_config.num_beams
@@ -309,7 +309,7 @@ def convert_to_hf(
         kwargs['output_scores'] = True
         if generation_config.num_beam_groups > 1:
             kwargs['diversity_penalty'] = generation_config.diversity_penalty
-    elif generation_config.do_sample:
+    elif generation_config.is_multinomial():
         # mulitinomial
         kwargs['temperature'] = generation_config.temperature
         kwargs['top_k'] = generation_config.top_k

diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from openvino_genai import GenerationConfig
+from typing import Tuple, List
+import json
+import os
 import pytest
 
 configs = [
@@ -94,3 +97,38 @@ def test_invalid_generation_configs_throws(generation_config):
     config = GenerationConfig()
     with pytest.raises(RuntimeError):
         config.update_generation_config(**generation_config)
+
+
+def load_genai_generation_config_from_file(configs: List[Tuple], temp_path):
+    for json_file in temp_path.glob("*.json"):
+        json_file.unlink()
+
+    for config_json, config_name in configs:
+        with (temp_path / config_name).open('w') as f:
+            json.dump(config_json, f)
+
+    ov_generation_config = GenerationConfig(temp_path / "generation_config.json")
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_generation_config
+
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_multiple_eos_are_read_as_stop_token_ids(tmp_path):
+    generation_config_json = {
+        "eos_token_id": [
+            2,
+            32000,
+            32007
+        ]
+    }
+    configs = [
+        (generation_config_json, "generation_config.json"),
+    ]
+
+    generation_config = load_genai_generation_config_from_file(configs, tmp_path)
+
+    assert generation_config.eos_token_id == 2
+    assert generation_config.stop_token_ids == { 2, 32000, 32007 }
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2023-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import os
 import pytest
 import numpy as np
 from transformers import AutoTokenizer
@@ -17,15 +18,19 @@
 
 
 def load_genai_tokenizer_with_configs(configs: List[Tuple], temp_path):
-    # load Tokenizer where all configs are cleared.
-    # remove existing jsons from previous tests
     for json_file in temp_path.glob("*.json"):
         json_file.unlink()
 
     for config_json, config_name in configs:
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
-    return openvino_genai.Tokenizer(temp_path)
+
+    ov_tokenizer = openvino_genai.Tokenizer(temp_path)
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_tokenizer
 
 
 def get_chat_templates():
@@ -181,7 +186,7 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
 @pytest.mark.nightly
 def test_set_chat_template():
     model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
 
     prompt = "how are you?"
     dummy_conversation = [

diff --git a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
@@ -123,11 +123,6 @@ Dataset filtered_dataset(const std::string& models_path, const std::string& data
         ov::genai::GenerationConfig greedy_search = ov::genai::greedy();
         greedy_search.max_new_tokens = std::min(max_output_len, output_len);
         greedy_search.ignore_eos = true;
-        greedy_search.repetition_penalty = 1.0;
-        greedy_search.frequency_penalty = 0.0;
-        greedy_search.presence_penalty = 0.0;
-        greedy_search.diversity_penalty = 0.0;
-        greedy_search.length_penalty = 0.0;
 
         dataset.push_data(human_question, greedy_search);
         dataset.push_lens(input_len, output_len);