Skip to content

Commit

Permalink
Fixed some tests run locally (openvinotoolkit#1187)
Browse files Browse the repository at this point in the history
Always tokenize as batch to return `attention_mask` to preserve the
error:
```
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
```
  • Loading branch information
ilya-lavrenov committed Nov 20, 2024
1 parent 5889c2a commit 189ec37
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions tests/python_tests/test_generate_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ def run_hf_ov_genai_comparison(model_descr, generation_config: Dict, prompt: str
generation_config_hf['early_stopping'] = STOP_CRITERIA_MAP[generation_config_hf.pop('stop_criteria')]
generation_config_hf.pop('ignore_eos', None)

encoded_prompt = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=True)
hf_encoded_output = model.generate(encoded_prompt, **generation_config_hf)
hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:], skip_special_tokens=True)
encoded_prompt = tokenizer([prompt], return_tensors='pt', add_special_tokens=True)
prompt_ids, attention_mask = encoded_prompt['input_ids'], encoded_prompt['attention_mask']
hf_encoded_output = model.generate(prompt_ids, attention_mask=attention_mask, **generation_config_hf)
hf_output = tokenizer.decode(hf_encoded_output[0, prompt_ids.shape[1]:], skip_special_tokens=True)

ov_output = pipe.generate(prompt, **config)
if config.get('num_return_sequences', 1) > 1:
Expand Down Expand Up @@ -179,12 +180,6 @@ def test_ov_tensors(model_descr, inputs):
@pytest.mark.parametrize("prompt", prompts)
@pytest.mark.precommit
@pytest.mark.nightly
@pytest.mark.xfail(
raises=TypeError,
reason="pybind was unable to find ov::Tensor from openvino yet",
strict=False,
condition=sys.platform in ["linux", "win32"]
)
def test_genai_tokenizer_encode(model_descr, prompt):
model_id, path, tokenizer, model, pipe = read_model(model_descr)
tok = pipe.get_tokenizer()
Expand Down

0 comments on commit 189ec37

Please sign in to comment.