Skip to content

Commit

Permalink
Refactored tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Jan 2, 2025
1 parent 4fcfa1d commit d516553
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 390 deletions.
142 changes: 21 additions & 121 deletions tests/python_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,6 @@ def get_greedy() -> GenerationConfig:
generation_config.max_new_tokens = 30
return generation_config

def get_greedy_with_min_and_max_tokens() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
generation_config.min_new_tokens = 15
generation_config.max_new_tokens = 30
return generation_config

def get_greedy_with_repetition_penalty() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
generation_config.repetition_penalty = 2.0
generation_config.max_new_tokens = 30
return generation_config

def get_greedy_with_penalties() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
Expand All @@ -42,33 +28,6 @@ def get_greedy_with_penalties() -> GenerationConfig:
generation_config.max_new_tokens = 30
return generation_config

def get_greedy_with_single_stop_string() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
generation_config.min_new_tokens = 15
generation_config.max_new_tokens = 50
generation_config.stop_strings = {"anag"} # expected match on "manage"
generation_config.include_stop_str_in_output = True
return generation_config

def get_greedy_with_multiple_stop_strings() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
generation_config.min_new_tokens = 1
generation_config.max_new_tokens = 50
generation_config.stop_strings = {".", "software", "Intel"}
generation_config.include_stop_str_in_output = True
return generation_config

def get_greedy_with_multiple_stop_strings_no_match() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_return_sequences = 1
generation_config.min_new_tokens = 1
generation_config.max_new_tokens = 50
generation_config.stop_strings = {"Einstein", "sunny", "geothermal"}
generation_config.include_stop_str_in_output = True
return generation_config

def get_beam_search() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_beam_groups = 3
Expand All @@ -79,78 +38,6 @@ def get_beam_search() -> GenerationConfig:
generation_config.num_return_sequences = generation_config.num_beams
return generation_config

def get_beam_search_min_and_max_tokens() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_beam_groups = 3
generation_config.num_beams = 6
generation_config.diversity_penalty = 1
generation_config.min_new_tokens = 15
generation_config.max_new_tokens = 30
generation_config.num_return_sequences = 3
generation_config.num_return_sequences = generation_config.num_beams
return generation_config

def get_beam_search_with_single_stop_string() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_beam_groups = 3
generation_config.num_beams = 6
generation_config.diversity_penalty = 1
generation_config.max_new_tokens = 50
generation_config.num_return_sequences = generation_config.num_beams
generation_config.stop_strings = {"open sour"} # expected match on "open source"
generation_config.include_stop_str_in_output = True
return generation_config

def get_beam_search_with_multiple_stop_strings() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_beam_groups = 3
generation_config.num_beams = 6
generation_config.diversity_penalty = 1
generation_config.max_new_tokens = 50
generation_config.num_return_sequences = generation_config.num_beams
generation_config.stop_strings = {".", "software", "Intel"}
generation_config.include_stop_str_in_output = True
return generation_config

def get_beam_search_with_multiple_stop_strings_no_match() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.num_beam_groups = 3
generation_config.num_beams = 6
generation_config.diversity_penalty = 1
generation_config.max_new_tokens = 30
generation_config.num_return_sequences = generation_config.num_beams
generation_config.stop_strings = {"Einstein", "sunny", "geothermal"}
generation_config.include_stop_str_in_output = True
return generation_config

def get_greedy_stop_strings_exclude_from_output() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.max_new_tokens = 30
generation_config.stop_strings = { "machines" }
generation_config.include_stop_str_in_output = False
return generation_config

def get_greedy_stop_strings_include_to_output() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.max_new_tokens = 30
generation_config.stop_strings = { "machines" }
generation_config.include_stop_str_in_output = True
return generation_config

def get_greedy_n_stop_strings_exclude_from_output() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.max_new_tokens = 30
generation_config.stop_strings = { "machines", "manage" }
generation_config.include_stop_str_in_output = False
return generation_config

def get_greedy_n_stop_strings_include_to_output() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.max_new_tokens = 30
generation_config.stop_strings = { "machines", "manage" }
generation_config.include_stop_str_in_output = True
return generation_config

def get_multinomial_temperature() -> GenerationConfig:
generation_config = GenerationConfig()
generation_config.do_sample = True
Expand Down Expand Up @@ -300,9 +187,15 @@ def convert_to_hf(

# copy default parameters
kwargs['bos_token_id'] = default_generation_config.bos_token_id
kwargs['eos_token_id'] = default_generation_config.eos_token_id
kwargs['pad_token_id'] = default_generation_config.pad_token_id

if len(generation_config.stop_token_ids) > 0:
kwargs['eos_token_id'] = list(generation_config.stop_token_ids)
elif generation_config.eos_token_id != -1:
kwargs['eos_token_id'] = generation_config.eos_token_id
else:
kwargs['eos_token_id'] = default_generation_config.eos_token_id

# copy penalties
kwargs['repetition_penalty'] = generation_config.repetition_penalty

Expand All @@ -314,8 +207,12 @@ def convert_to_hf(
kwargs['no_repeat_ngram_size'] = generation_config.no_repeat_ngram_size
kwargs['num_return_sequences'] = generation_config.num_return_sequences
kwargs['output_scores'] = True

if generation_config.num_beam_groups > 1:
kwargs['diversity_penalty'] = generation_config.diversity_penalty

from ov_genai_test_utils import STOP_CRITERIA_MAP
kwargs['early_stopping'] = STOP_CRITERIA_MAP[generation_config.stop_criteria]
elif generation_config.is_multinomial():
# mulitinomial
kwargs['temperature'] = generation_config.temperature
Expand All @@ -338,9 +235,10 @@ def run_hugging_face(
) -> List[GenerationResult]:
hf_generation_config = convert_to_hf(opt_model.generation_config, generation_config)
generation_results = []

for prompt in prompts:
inputs = hf_tokenizer(prompt, return_tensors="pt")
prompt_len = inputs['input_ids'].numel()
prompt_len = 0 if generation_config.echo else inputs['input_ids'].numel()
generate_outputs = opt_model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
generation_config=hf_generation_config, return_dict_in_generate=True, tokenizer=hf_tokenizer)
all_text_batch = hf_tokenizer.batch_decode([generated_ids[prompt_len:] for generated_ids in generate_outputs.sequences], skip_special_tokens=True)
Expand Down Expand Up @@ -379,9 +277,11 @@ def run_continuous_batching(
def run_llm_pipeline(
models_path : Path,
prompts: List[str],
generation_config : GenerationConfig
generation_config : GenerationConfig,
use_cb : bool = False
) -> List[GenerationResult]:
ov_pipe = LLMPipeline(models_path, device='CPU')
properties = { 'scheduler_config' : SchedulerConfig() } if use_cb else { }
ov_pipe = LLMPipeline(models_path, device='CPU', **properties)

generation_results = []
for prompt in prompts:
Expand All @@ -400,7 +300,7 @@ def run_llm_pipeline(
return generation_results


def compare_generation_result(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig ):
def compare_generation_result(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig):
if generation_config.is_beam_search():
assert len(hf_result.m_scores) == len(ov_result.m_scores)
for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores):
Expand Down Expand Up @@ -429,7 +329,7 @@ def compare_generation_results(prompts: List[str], hf_results: List[GenerationRe
compare_generation_result(ref_result, ov_result, generation_config)


def get_hugging_face_models(model_id: str, use_optimum = True):
def get_hugging_face_models(model_id: str, use_optimum = True):
hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) if use_optimum else \
AutoModelForCausalLM.from_pretrained(model_id)
Expand All @@ -448,7 +348,7 @@ def convert_models(opt_model : OVModelForCausalLM, hf_tokenizer : AutoTokenizer,
serialize(detokenizer, models_path / "openvino_detokenizer.xml")


def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_config: GenerationConfig | dict, tmp_path: Path):
def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_config: GenerationConfig | dict, tmp_path: Path, use_cb : bool = False):
use_optimum = True
models_path : Path = tmp_path / model_id
opt_model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum)
Expand All @@ -459,7 +359,7 @@ def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_conf
if use_optimum:
convert_models(opt_model, hf_tokenizer, models_path)

ov_results = run_llm_pipeline(models_path, prompts, generation_config)
ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb)
hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)

compare_generation_results(prompts, hf_results, ov_results, generation_config)
Expand Down
Loading

0 comments on commit d516553

Please sign in to comment.