From c2a8f979cde9096c0cfa9ef2ad298e9ef3c0295a Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Thu, 5 Dec 2024 22:05:30 +0100 Subject: [PATCH] Fixed number of input tokens for VLM. (#1317) Fixed number of input tokens for VLM. --- src/cpp/src/visual_language/pipeline.cpp | 1 + tests/python_tests/test_vlm_api.py | 25 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index 8059c60896..17cf30e90f 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -165,6 +165,7 @@ class ov::genai::VLMPipeline::VLMPipelineImpl { // Common perf metrics auto& res_raw_counters = decoded.perf_metrics.raw_metrics; + decoded.perf_metrics.num_input_tokens = prompt_ids.get_size(); decoded.perf_metrics.load_time = m_load_time_ms; res_raw_counters.generate_durations.emplace_back(PerfMetrics::get_microsec(generate_end_time - generate_start_time)); res_raw_counters.detokenization_durations.emplace_back(PerfMetrics::get_microsec(decode_end_time - decode_start_time)); diff --git a/tests/python_tests/test_vlm_api.py b/tests/python_tests/test_vlm_api.py index f0482300b4..b4df6492bb 100644 --- a/tests/python_tests/test_vlm_api.py +++ b/tests/python_tests/test_vlm_api.py @@ -101,19 +101,18 @@ def test_perf_metrics(cache): assert perf_metrics is not None - assert perf_metrics.get_load_time() > 0 - assert perf_metrics.get_num_generated_tokens() > 0 - assert perf_metrics.get_num_input_tokens() > 0 - assert perf_metrics.get_ttft().mean > 0 - assert perf_metrics.get_tpot().mean > 0 - assert perf_metrics.get_ipot().mean > 0 - assert perf_metrics.get_throughput().mean > 0 - assert perf_metrics.get_inference_duration().mean > 0 - assert perf_metrics.get_generate_duration().mean > 0 - assert perf_metrics.get_tokenization_duration().mean > 0 - assert perf_metrics.get_detokenization_duration().mean > 0 - assert perf_metrics.get_detokenization_duration().mean > 0 - assert perf_metrics.get_prepare_embeddings_duration().mean > 0 + assert 0 < perf_metrics.get_load_time() < 2000 + assert 0 < perf_metrics.get_num_generated_tokens() < 100 + assert 0 < perf_metrics.get_num_input_tokens() < 100 + assert 0 < perf_metrics.get_ttft().mean < 1000 + assert 0 < perf_metrics.get_tpot().mean < 100 + assert 0 < perf_metrics.get_ipot().mean < 100 + assert 0 < perf_metrics.get_throughput().mean < 1000 + assert 0 < perf_metrics.get_inference_duration().mean < 1000 + assert 0 < perf_metrics.get_generate_duration().mean < 1000 + assert 0 < perf_metrics.get_tokenization_duration().mean < 100 + assert 0 < perf_metrics.get_detokenization_duration().mean < 10 + assert 0 < perf_metrics.get_prepare_embeddings_duration().mean < 100 # assert that calculating statistics manually from the raw counters we get the same results as from PerfMetrics vlm_raw_metrics = perf_metrics.vlm_raw_metrics