diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 5fc4617f2c..2e0afaa882 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -16,10 +16,11 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/l_openvino_toolkit_ubuntu20_2025.0.0.dev20250109_x86_64.tgz
-  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/l_openvino_toolkit_ubuntu22_2025.0.0.dev20250109_x86_64.tgz
-  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/m_openvino_toolkit_macos_12_6_2025.0.0.dev20250109_x86_64.tgz
-  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/w_openvino_toolkit_windows_2025.0.0.dev20250109_x86_64.zip
+  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/l_openvino_toolkit_ubuntu20_2025.1.0.dev20250116_x86_64.tgz
+  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/l_openvino_toolkit_ubuntu22_2025.1.0.dev20250116_x86_64.tgz
+  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/m_openvino_toolkit_macos_12_6_2025.1.0.dev20250116_x86_64.tgz
+  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/w_openvino_toolkit_windows_2025.1.0.dev20250116_x86_64.zip
+
 jobs:
   cpp-multinomial-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores
@@ -463,6 +464,7 @@ jobs:
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
           LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
+
   cpp-prompt_lookup_decoding_lm-ubuntu:
     runs-on: ubuntu-20.04-16-cores
     defaults:
@@ -520,6 +522,7 @@ jobs:
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
           LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
+
   cpp-Phi-1_5:
     runs-on: ubuntu-20.04-16-cores
     defaults:
@@ -697,7 +700,6 @@ jobs:
           diff pred2.txt ref.txt
           echo "Chat sample python" passed
 
-
   visual_language_chat_sample-ubuntu-minicpm_v2_6:
     runs-on: ubuntu-22.04-16-cores
     steps:
@@ -836,6 +838,36 @@ jobs:
           <<< $'Who drew this painting?\nWhen did the painter live?'
         timeout-minutes: 4
 
+  visual_language_chat_sample-ubuntu-qwen2vl:
+    runs-on: ubuntu-22.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+      - uses: ./.github/actions/install_openvino
+        with:
+          ov_link: ${{ env.l_u22_ov_link }}
+      - uses: ./.github/actions/build_app
+        with:
+          build_target: 'visual_language_chat py_openvino_genai'
+      - uses: ./.github/actions/install_python_deps
+      - name: Download and convert Qwen2VL model
+        run: |
+          source ./ov/setupvars.sh
+          optimum-cli export openvino --model Qwen/Qwen2-VL-2B-Instruct ./qwen2_vl_2b_ov/ --trust-remote-code
+      - name: Download images
+        run: |
+          wget https://llava-vl.github.io/static/images/monalisa.jpg
+      - name: Run visual_language_chat C++ sample - Qwen2VL
+        run: >
+          source ./ov/setupvars.sh
+          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./qwen2_vl_2b_ov/ monalisa.jpg
+          <<< $'Who drew this painting?\nWhen did the painter live?'
+        timeout-minutes: 4
+
   cpp-continuous-batching-ubuntu:
     runs-on: ubuntu-20.04-8-cores
     defaults:
diff --git a/.github/workflows/job_vlm_sample_llava.yml b/.github/workflows/job_vlm_sample_llava.yml
index a8a7a19a5b..1fb9cdee98 100644
--- a/.github/workflows/job_vlm_sample_llava.yml
+++ b/.github/workflows/job_vlm_sample_llava.yml
@@ -11,7 +11,7 @@ on:
         type: string
 
 env:
-  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/l_openvino_toolkit_ubuntu22_2025.0.0.dev20250109_x86_64.tgz
+  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/l_openvino_toolkit_ubuntu22_2025.1.0.dev20250116_x86_64.tgz
 
 jobs:
   visual_language_chat_sample-ubuntu-llava:
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
index 6129aec624..55e4dc54ad 100644
--- a/.github/workflows/lcm_dreamshaper_cpp.yml
+++ b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -18,8 +18,8 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.9'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/l_openvino_toolkit_ubuntu22_2025.0.0.dev20250109_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17800-91ae987c516/w_openvino_toolkit_windows_2025.0.0.dev20250109_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/l_openvino_toolkit_ubuntu22_2025.1.0.dev20250116_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-17911-83c047443de/w_openvino_toolkit_windows_2025.1.0.dev20250116_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 jobs:
@@ -93,7 +93,7 @@ jobs:
           ${{ env.build_dir }}/samples/cpp/image_generation/inpainting ./models/lcm_dreamshaper_v7 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" ./image.png ./mask_image.png
 
   lcm_dreamshaper_v7_cpp-windows:
-    runs-on: windows-2019
+    runs-on: windows-2022
     defaults:
       run:
         shell: pwsh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a67a24bab..bb19676da3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -30,7 +30,7 @@ if(UNIX AND NOT (APPLE OR ANDROID OR CYGWIN))
 endif()
 
 project(OpenVINOGenAI
-        VERSION 2025.0.0.0
+        VERSION 2025.1.0.0
         DESCRIPTION "OpenVINO GenAI"
         HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai"
         LANGUAGES CXX C)
diff --git a/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md
index 9487c715d9..f79234489d 100644
--- a/SUPPORTED_MODELS.md
+++ b/SUPPORTED_MODELS.md
@@ -362,6 +362,17 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
         </ul>
       </td>
     </tr>
+    <tr>
+      <td><code>Qwen2-VL</code></td>
+      <td>Qwen2-VL</td>
+      <td>Not supported</td>
+      <td>
+        <ul>
+          <li><a href="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct"><code>Qwen/Qwen2-VL-2B-Instruct</code></a></li>
+          <li><a href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct"><code>Qwen/Qwen2-VL-7B-Instruct</code></a></li>
+        </ul>
+      </td>
+    </tr>
   </tbody>
 </table>
 
diff --git a/cmake/features.cmake b/cmake/features.cmake
index 0434b21ee9..8b2e05472b 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/cmake/version.cmake b/cmake/version.cmake
index b9b51e8fe2..38c54a6b1c 100644
--- a/cmake/version.cmake
+++ b/cmake/version.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/pyproject.toml b/pyproject.toml
index 27318d42ed..722a012f34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openvino-genai"
-version = "2025.0.0.0"
+version = "2025.1.0.0"
 description = "Library of the most popular Generative AI model pipelines, optimized execution methods, and samples"
 requires-python = ">=3.9"
 readme = { file = "src/README.md", content-type="text/markdown" }
@@ -30,7 +30,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython"
 ]
 dependencies = [
-    "openvino_tokenizers~=2025.0.0.0.dev"
+    "openvino_tokenizers~=2025.1.0.0.dev"
 ]
 
 [tool.py-build-cmake.module]
@@ -52,7 +52,7 @@ options = {"BUILD_TOKENIZERS" = "OFF"}
 [build-system]
 requires = [
     "py-build-cmake==0.3.4",
-    "openvino~=2025.0.0.0.dev",
+    "openvino~=2025.1.0.0.dev",
     "pybind11-stubgen==2.5.1",
     "cmake~=3.23.0"
 ]
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index d32eb832a6..d84423b1e8 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/samples/cpp/image_generation/CMakeLists.txt b/samples/cpp/image_generation/CMakeLists.txt
index f3e4860ce0..16710d2697 100644
--- a/samples/cpp/image_generation/CMakeLists.txt
+++ b/samples/cpp/image_generation/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 find_package(OpenVINOGenAI REQUIRED
diff --git a/samples/cpp/image_generation/README.md b/samples/cpp/image_generation/README.md
index 3dcb64b97c..4d2b2ef9ff 100644
--- a/samples/cpp/image_generation/README.md
+++ b/samples/cpp/image_generation/README.md
@@ -28,7 +28,7 @@ Users can change the sample code and play with the following generation paramete
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
diff --git a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp
index 1bba41ffc5..2e37c88f8b 100644
--- a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp
+++ b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/text2image_pipeline.hpp"
diff --git a/samples/cpp/image_generation/image2image.cpp b/samples/cpp/image_generation/image2image.cpp
index 2e1e5f57ba..284948f38e 100644
--- a/samples/cpp/image_generation/image2image.cpp
+++ b/samples/cpp/image_generation/image2image.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/image2image_pipeline.hpp"
diff --git a/samples/cpp/image_generation/imwrite.cpp b/samples/cpp/image_generation/imwrite.cpp
index 373da7dfe8..18fba1ff34 100644
--- a/samples/cpp/image_generation/imwrite.cpp
+++ b/samples/cpp/image_generation/imwrite.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/samples/cpp/image_generation/imwrite.hpp b/samples/cpp/image_generation/imwrite.hpp
index 9da1b4af2f..464cbacd34 100644
--- a/samples/cpp/image_generation/imwrite.hpp
+++ b/samples/cpp/image_generation/imwrite.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/samples/cpp/image_generation/inpainting.cpp b/samples/cpp/image_generation/inpainting.cpp
index a446035e0f..2e866ca345 100644
--- a/samples/cpp/image_generation/inpainting.cpp
+++ b/samples/cpp/image_generation/inpainting.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/inpainting_pipeline.hpp"
diff --git a/samples/cpp/image_generation/load_image.cpp b/samples/cpp/image_generation/load_image.cpp
index 7f14e54b3e..69b69d9f15 100644
--- a/samples/cpp/image_generation/load_image.cpp
+++ b/samples/cpp/image_generation/load_image.cpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <sstream>
diff --git a/samples/cpp/image_generation/load_image.hpp b/samples/cpp/image_generation/load_image.hpp
index f66dd2caf2..a04749e1d0 100644
--- a/samples/cpp/image_generation/load_image.hpp
+++ b/samples/cpp/image_generation/load_image.hpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/samples/cpp/image_generation/lora_text2image.cpp b/samples/cpp/image_generation/lora_text2image.cpp
index af042a2c89..d96df2ceb0 100644
--- a/samples/cpp/image_generation/lora_text2image.cpp
+++ b/samples/cpp/image_generation/lora_text2image.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/text2image_pipeline.hpp"
diff --git a/samples/cpp/image_generation/text2image.cpp b/samples/cpp/image_generation/text2image.cpp
index 5668259f90..0ec0f924e2 100644
--- a/samples/cpp/image_generation/text2image.cpp
+++ b/samples/cpp/image_generation/text2image.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/text2image_pipeline.hpp"
diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
index f798b4f5fc..efad21a647 100644
--- a/samples/cpp/text_generation/CMakeLists.txt
+++ b/samples/cpp/text_generation/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 find_package(OpenVINOGenAI REQUIRED
diff --git a/samples/cpp/text_generation/beam_search_causal_lm.cpp b/samples/cpp/text_generation/beam_search_causal_lm.cpp
index fc18fa8e0c..9e1ee069ad 100644
--- a/samples/cpp/text_generation/beam_search_causal_lm.cpp
+++ b/samples/cpp/text_generation/beam_search_causal_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/genai/llm_pipeline.hpp>
diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 76f2cbef2c..4a8c8d0723 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/chat_sample.cpp b/samples/cpp/text_generation/chat_sample.cpp
index 41d63fc0f1..c0d172563c 100644
--- a/samples/cpp/text_generation/chat_sample.cpp
+++ b/samples/cpp/text_generation/chat_sample.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/encrypted_model_causal_lm.cpp b/samples/cpp/text_generation/encrypted_model_causal_lm.cpp
index 3ea94d605f..7926de5552 100644
--- a/samples/cpp/text_generation/encrypted_model_causal_lm.cpp
+++ b/samples/cpp/text_generation/encrypted_model_causal_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/greedy_causal_lm.cpp b/samples/cpp/text_generation/greedy_causal_lm.cpp
index b5ca59095b..ca5e193da1 100644
--- a/samples/cpp/text_generation/greedy_causal_lm.cpp
+++ b/samples/cpp/text_generation/greedy_causal_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/lora_greedy_causal_lm.cpp b/samples/cpp/text_generation/lora_greedy_causal_lm.cpp
index b854b95fd3..0a74a00599 100644
--- a/samples/cpp/text_generation/lora_greedy_causal_lm.cpp
+++ b/samples/cpp/text_generation/lora_greedy_causal_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/multinomial_causal_lm.cpp b/samples/cpp/text_generation/multinomial_causal_lm.cpp
index bb74deffb1..96744e6136 100644
--- a/samples/cpp/text_generation/multinomial_causal_lm.cpp
+++ b/samples/cpp/text_generation/multinomial_causal_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/llm_pipeline.hpp"
diff --git a/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp b/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp
index 8b48dbade0..bf4d81daa2 100644
--- a/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp
+++ b/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/samples/cpp/text_generation/speculative_decoding_lm.cpp b/samples/cpp/text_generation/speculative_decoding_lm.cpp
index e10228863f..8a34bbf8a9 100644
--- a/samples/cpp/text_generation/speculative_decoding_lm.cpp
+++ b/samples/cpp/text_generation/speculative_decoding_lm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
index 9d3dea68a3..6f209ad0c8 100644
--- a/samples/cpp/visual_language_chat/CMakeLists.txt
+++ b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 find_package(OpenVINOGenAI REQUIRED
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index 73baf0088a..0017bd0b7c 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -12,7 +12,7 @@ There are two sample files:
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 858a626fd5..8467738307 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cxxopts.hpp>
diff --git a/samples/cpp/visual_language_chat/load_image.cpp b/samples/cpp/visual_language_chat/load_image.cpp
index 6dd246d647..d4f0d5de28 100644
--- a/samples/cpp/visual_language_chat/load_image.cpp
+++ b/samples/cpp/visual_language_chat/load_image.cpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <sstream>
diff --git a/samples/cpp/visual_language_chat/load_image.hpp b/samples/cpp/visual_language_chat/load_image.hpp
index d0dcc271cd..5a72f8cdaf 100644
--- a/samples/cpp/visual_language_chat/load_image.hpp
+++ b/samples/cpp/visual_language_chat/load_image.hpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/samples/cpp/whisper_speech_recognition/CMakeLists.txt b/samples/cpp/whisper_speech_recognition/CMakeLists.txt
index 39f017adb1..93b9e22941 100644
--- a/samples/cpp/whisper_speech_recognition/CMakeLists.txt
+++ b/samples/cpp/whisper_speech_recognition/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 find_package(OpenVINOGenAI REQUIRED
diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md
index 2ea3322dee..47650b566c 100644
--- a/samples/cpp/whisper_speech_recognition/README.md
+++ b/samples/cpp/whisper_speech_recognition/README.md
@@ -6,7 +6,7 @@ This example showcases inference of speech recognition Whisper Models. The appli
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
diff --git a/samples/cpp/whisper_speech_recognition/audio_utils.cpp b/samples/cpp/whisper_speech_recognition/audio_utils.cpp
index a64db44f3f..b769ded671 100644
--- a/samples/cpp/whisper_speech_recognition/audio_utils.cpp
+++ b/samples/cpp/whisper_speech_recognition/audio_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "audio_utils.hpp"
diff --git a/samples/cpp/whisper_speech_recognition/audio_utils.hpp b/samples/cpp/whisper_speech_recognition/audio_utils.hpp
index 6e4b141d83..b8b022db5a 100644
--- a/samples/cpp/whisper_speech_recognition/audio_utils.hpp
+++ b/samples/cpp/whisper_speech_recognition/audio_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp b/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp
index 3df17a77f5..3b2b4ff466 100644
--- a/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp
+++ b/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "audio_utils.hpp"
diff --git a/samples/deployment-requirements.txt b/samples/deployment-requirements.txt
index c6ad9eaaa8..312bd79799 100644
--- a/samples/deployment-requirements.txt
+++ b/samples/deployment-requirements.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-openvino_genai~=2025.0.0.0.dev
+openvino_genai~=2025.1.0.0.dev
 librosa==0.10.2.post1  # For Whisper
 pillow==11.1.0  # Image processing for VLMs
diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
index a6aa3870f4..c35c2ca6de 100644
--- a/samples/export-requirements.txt
+++ b/samples/export-requirements.txt
@@ -1,12 +1,12 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-openvino-tokenizers~=2025.0.0.0.dev
+openvino-tokenizers~=2025.1.0.0.dev
 optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
 numpy<2.0.0; sys_platform == 'darwin'
 einops==0.8.0  # For Qwen
 transformers_stream_generator==0.0.5  # For Qwen
-diffusers==0.32.1 # For image generation pipelines
+diffusers==0.32.2 # For image generation pipelines
 timm==1.0.13  # For exporting InternVL2
 torchvision  # For visual language models
 transformers>=4.43 # For Whisper
diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py
index a5ed4acfc1..d279ab95fc 100755
--- a/samples/python/text_generation/benchmark_genai.py
+++ b/samples/python/text_generation/benchmark_genai.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
diff --git a/samples/python/text_generation/multinomial_causal_lm.py b/samples/python/text_generation/multinomial_causal_lm.py
index 5ec9d54601..c915b89a2f 100755
--- a/samples/python/text_generation/multinomial_causal_lm.py
+++ b/samples/python/text_generation/multinomial_causal_lm.py
@@ -31,6 +31,7 @@ def __init__(self, tokenizer):
         self.tokens_cache = []
         self.text_queue = queue.Queue()
         self.print_len = 0
+        self.decoded_lengths = []
 
     def __iter__(self):
         """
@@ -80,30 +81,35 @@ def put(self, token_id: int) -> bool:
         
         Returns:
             bool: True if generation should be stopped, False otherwise.
-        """        
+        """
         self.tokens_cache.append(token_id)
         text = self.tokenizer.decode(self.tokens_cache)
+        self.decoded_lengths.append(len(text))
 
         word = ''
+        delay_n_tokens = 3
         if len(text) > self.print_len and '\n' == text[-1]:
             # Flush the cache after the new line symbol.
-            word = text[self.print_len:]            
+            word = text[self.print_len:]
             self.tokens_cache = []
+            self.decoded_lengths = []
             self.print_len = 0
-        elif len(text) >= 3 and text[-1] == chr(65533):
+        elif len(text) > 0 and text[-1] == chr(65533):
             # Don't print incomplete text.
-            pass
-        elif len(text) > self.print_len:
-            # It is possible to have a shorter text after adding new token.
-            # Print to output only if text length is increaesed.
-            word = text[self.print_len:]
-            self.print_len = len(text)
-        self.put_word(word)        
-        
+            self.decoded_lengths[-1] = -1
+        elif len(self.tokens_cache) >= delay_n_tokens:
+            print_until = self.decoded_lengths[-delay_n_tokens]
+            if print_until != -1 and print_until > self.print_len:
+                # It is possible to have a shorter text after adding new token.
+                # Print to output only if text length is increased and text is complete (print_until != -1).
+                word = text[self.print_len:print_until]
+                self.print_len = print_until
+        self.put_word(word)
+
         if self.get_stop_flag():
             # When generation is stopped from streamer then end is not called, need to call it here manually.
             self.end()
-            return True  # True means stop  generation
+            return True  # True means stop generation
         else:
             return False  # False means continue generation
 
@@ -129,6 +135,7 @@ def __init__(self, tokenizer, tokens_len):
     def put(self, token_id: int) -> bool:
         if (len(self.tokens_cache) + 1) % self.tokens_len != 0:
             self.tokens_cache.append(token_id)
+            self.decoded_lengths.append(-1)
             return False
         return super().put(token_id)
 
diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py
index 4a8f50de73..4ca272fd14 100755
--- a/samples/python/visual_language_chat/benchmark_vlm.py
+++ b/samples/python/visual_language_chat/benchmark_vlm.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d9f3cc64db..2f615a1b6f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
index e954037daf..43bca747ec 100644
--- a/src/cpp/CMakeLists.txt
+++ b/src/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/src/cpp/include/openvino/genai/cache_eviction.hpp b/src/cpp/include/openvino/genai/cache_eviction.hpp
index b8312361eb..8bab2fd744 100644
--- a/src/cpp/include/openvino/genai/cache_eviction.hpp
+++ b/src/cpp/include/openvino/genai/cache_eviction.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
index ed9fc3a30d..6c5552a7b5 100644
--- a/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 164ff29131..3a75fc02ea 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/generation_handle.hpp b/src/cpp/include/openvino/genai/generation_handle.hpp
index 953e573edd..6619e3e012 100644
--- a/src/cpp/include/openvino/genai/generation_handle.hpp
+++ b/src/cpp/include/openvino/genai/generation_handle.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp
index d48661d899..13da4a5317 100644
--- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp
index a3b9ebbd88..06196efe59 100644
--- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp
index 563fb8711d..cb74d8a214 100644
--- a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp
index 95f846668b..ae9a6cb4ce 100644
--- a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp
index bd7073520a..6d6b2ed35a 100644
--- a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp
index c6c1f59c88..b086bd2aad 100644
--- a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp
index 03dd9468f7..1f8833d406 100644
--- a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/scheduler.hpp b/src/cpp/include/openvino/genai/image_generation/scheduler.hpp
index 25c5e07a2f..6e51f81917 100644
--- a/src/cpp/include/openvino/genai/image_generation/scheduler.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/scheduler.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp
index 7f96af49c2..281771fdfc 100644
--- a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp
index 11797226eb..5e2e1716c5 100644
--- a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp
index 3dc1fc0803..11787869cf 100644
--- a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp
index 4acfd2ce9b..f0084d44b4 100644
--- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp
+++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index e7a7c40f9b..31b1ac1675 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/lora_adapter.hpp b/src/cpp/include/openvino/genai/lora_adapter.hpp
index b6b91bee20..4357b1be5b 100644
--- a/src/cpp/include/openvino/genai/lora_adapter.hpp
+++ b/src/cpp/include/openvino/genai/lora_adapter.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/perf_metrics.hpp b/src/cpp/include/openvino/genai/perf_metrics.hpp
index 659239be66..1ea0fb55f9 100644
--- a/src/cpp/include/openvino/genai/perf_metrics.hpp
+++ b/src/cpp/include/openvino/genai/perf_metrics.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/scheduler_config.hpp b/src/cpp/include/openvino/genai/scheduler_config.hpp
index 45d98c51bb..a551b866c6 100644
--- a/src/cpp/include/openvino/genai/scheduler_config.hpp
+++ b/src/cpp/include/openvino/genai/scheduler_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/streamer_base.hpp b/src/cpp/include/openvino/genai/streamer_base.hpp
index 724ec08f39..f286e896e5 100644
--- a/src/cpp/include/openvino/genai/streamer_base.hpp
+++ b/src/cpp/include/openvino/genai/streamer_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
index 548e4dc332..0a54d1da2a 100644
--- a/src/cpp/include/openvino/genai/tokenizer.hpp
+++ b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/visibility.hpp b/src/cpp/include/openvino/genai/visibility.hpp
index 4a1a60bb61..1daaf89ec6 100644
--- a/src/cpp/include/openvino/genai/visibility.hpp
+++ b/src/cpp/include/openvino/genai/visibility.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp b/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp
index 18476a5e7f..86750a13f6 100644
--- a/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp
+++ b/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/visual_language/pipeline.hpp b/src/cpp/include/openvino/genai/visual_language/pipeline.hpp
index 43f8a9b8b3..8c3d380b0f 100644
--- a/src/cpp/include/openvino/genai/visual_language/pipeline.hpp
+++ b/src/cpp/include/openvino/genai/visual_language/pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/whisper_generation_config.hpp b/src/cpp/include/openvino/genai/whisper_generation_config.hpp
index 44d611923d..12ca32ebb2 100644
--- a/src/cpp/include/openvino/genai/whisper_generation_config.hpp
+++ b/src/cpp/include/openvino/genai/whisper_generation_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/include/openvino/genai/whisper_pipeline.hpp b/src/cpp/include/openvino/genai/whisper_pipeline.hpp
index 6078298c60..8ba6a6a8e1 100644
--- a/src/cpp/include/openvino/genai/whisper_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/whisper_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/attention_output.hpp b/src/cpp/src/attention_output.hpp
index b46ede11d2..602fcda1a0 100644
--- a/src/cpp/src/attention_output.hpp
+++ b/src/cpp/src/attention_output.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/block_manager.hpp b/src/cpp/src/block_manager.hpp
index 4ca263777b..0e1fe547d4 100644
--- a/src/cpp/src/block_manager.hpp
+++ b/src/cpp/src/block_manager.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/cache_eviction.cpp b/src/cpp/src/cache_eviction.cpp
index 94f3110cce..d6533daeb1 100644
--- a/src/cpp/src/cache_eviction.cpp
+++ b/src/cpp/src/cache_eviction.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "cache_eviction.hpp"
diff --git a/src/cpp/src/cache_eviction.hpp b/src/cpp/src/cache_eviction.hpp
index a32eb1ad0a..d2c8d0a553 100644
--- a/src/cpp/src/cache_eviction.hpp
+++ b/src/cpp/src/cache_eviction.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/cache_manager.hpp b/src/cpp/src/cache_manager.hpp
index 20d4c0c51c..13d75bac43 100644
--- a/src/cpp/src/cache_manager.hpp
+++ b/src/cpp/src/cache_manager.hpp
@@ -1,15 +1,49 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
 
 #include <vector>
 #include <list>
-
 #include "openvino/runtime/tensor.hpp"
-
 #include "device_config.hpp"
 
+#ifndef _WIN32
+#include <sys/mman.h>
+#include "openvino/core/shape.hpp"
+
+
+class TensorMmapAllocator { 
+    size_t m_total_size;
+    void* m_data;
+ 
+public: 
+    TensorMmapAllocator(size_t total_size) : 
+        m_total_size(total_size) { } 
+  
+    void* allocate(size_t bytes, size_t) { 
+        if (m_total_size == bytes) { 
+            m_data = mmap(NULL,  bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            OPENVINO_ASSERT(m_data != MAP_FAILED);
+            return m_data; 
+        } 
+        throw std::runtime_error{"Unexpected number of bytes was requested to allocate."}; 
+    } 
+  
+    void deallocate(void*, size_t bytes, size_t) { 
+        if (m_total_size != bytes) { 
+            throw std::runtime_error{"Unexpected number of bytes was requested to deallocate."}; 
+        }
+        munmap(m_data, bytes);
+    } 
+  
+    bool is_equal(const TensorMmapAllocator& other) const noexcept { 
+        return this == &other; 
+    } 
+}; 
+
+#endif
+
 namespace ov::genai {
 class CacheManager {
     DeviceConfig m_device_config;
@@ -56,8 +90,17 @@ class CacheManager {
             for (size_t decoder_layer_id = 0; decoder_layer_id < m_device_config.get_num_layers(); ++decoder_layer_id) {
                 ov::Shape value_cache_shape = set_first_dim_and_make_static(m_device_config.get_value_cache_shape(decoder_layer_id), num_kv_blocks);
                 ov::Shape key_cache_shape = set_first_dim_and_make_static(m_device_config.get_key_cache_shape(decoder_layer_id), num_kv_blocks);
+#ifdef _WIN32
                 ov::Tensor key_cache(m_device_config.get_cache_precision(), key_cache_shape);
                 ov::Tensor value_cache(m_device_config.get_cache_precision(), value_cache_shape);
+#else
+                auto key_size = ov::shape_size(key_cache_shape) * m_device_config.get_cache_precision().size();
+                auto value_size = ov::shape_size(value_cache_shape) * m_device_config.get_cache_precision().size();
+
+                ov::Tensor key_cache = ov::Tensor(m_device_config.get_cache_precision(), key_cache_shape, TensorMmapAllocator(key_size));
+                ov::Tensor value_cache = ov::Tensor(m_device_config.get_cache_precision(), value_cache_shape, TensorMmapAllocator(value_size));
+
+#endif
 
                 auto key_cache_roi_end = static_cast<unsigned char*>(key_cache.data());
                 auto value_cache_roi_end = static_cast<unsigned char*>(value_cache.data());
@@ -82,13 +125,14 @@ class CacheManager {
 
                 }
 
+#ifdef _WIN32
                 // Some optimizations like AVX2, AVX512, AMX require a minimal shape and 
                 // perform multiplying by zero on the excess data. Uninitialized tensor data contain NAN's, 
                 // so NAN * 0 returns non-zero invalid data.
                 // So we need to set zeros to all newly allocated tensors data.
                 std::memset(key_cache_roi_end, 0, key_cache.get_byte_size() - key_roi_size_byte);
                 std::memset(value_cache_roi_end, 0, value_cache.get_byte_size() - value_roi_size_byte);
-                
+#endif
                 // set new cache tensors
                 if (m_key_cache.size() > decoder_layer_id) {
                     m_key_cache[decoder_layer_id] = key_cache;
diff --git a/src/cpp/src/cache_state_dumper.hpp b/src/cpp/src/cache_state_dumper.hpp
index ec6b63fdfb..e5292bf871 100644
--- a/src/cpp/src/cache_state_dumper.hpp
+++ b/src/cpp/src/cache_state_dumper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -40,15 +40,15 @@ class CacheStateDumper {
      * @param block_mgr A block manager owning the caches.
      * @param sequence_groups Sequence groups currently utilizing the cache.
      */
-    void dump_cache_state(const BlockManager &block_mgr, const std::vector <SequenceGroup::Ptr> &sequence_groups,
+    void dump_cache_state(const std::shared_ptr<BlockManager> block_mgr, const std::vector <SequenceGroup::Ptr> &sequence_groups,
                           size_t dump_count) {
-        for (size_t layer_idx = 0; layer_idx < block_mgr.m_num_layers; layer_idx++) {
+        for (size_t layer_idx = 0; layer_idx < block_mgr->m_num_layers; layer_idx++) {
             auto per_layer_folder = get_per_layer_folder(layer_idx);
             auto file_path = (per_layer_folder / (m_run_id + ".txt")).string();
             std::ofstream out_stream(file_path, std::ios::out);
             OPENVINO_ASSERT(out_stream.is_open());
 
-            out_stream << block_mgr.m_allocator.m_total_num_blocks << std::endl;
+            out_stream << block_mgr->m_allocator.m_total_num_blocks << std::endl;
             out_stream << sequence_groups.size() << std::endl;
             for (const auto &seq_group_ptr: sequence_groups) {
                 out_stream << seq_group_ptr->get_request_id() << ' ';
@@ -57,7 +57,7 @@ class CacheStateDumper {
                 }
                 out_stream << std::endl;
             }
-            for (const auto &seq_id_and_blocks: block_mgr.m_block_table) {
+            for (const auto &seq_id_and_blocks: block_mgr->m_block_table) {
                 for (const auto &block: seq_id_and_blocks.second[layer_idx]) {
                     const size_t seq_id = seq_id_and_blocks.first;
                     out_stream << seq_id << " " << block->get_index() << " " << block->get_references_count()
@@ -70,7 +70,7 @@ class CacheStateDumper {
             std::ofstream out_stream_cache_usage;
 
             out_stream_cache_usage.open(cache_usage_file_path, std::ios::app);
-            out_stream_cache_usage << dump_count << ' ' << block_mgr.get_used_percentage() << std::endl;
+            out_stream_cache_usage << dump_count << ' ' << block_mgr->get_used_percentage() << std::endl;
             out_stream_cache_usage.flush();
             dump_count++;
         }
diff --git a/src/cpp/src/circular_buffer_queue.hpp b/src/cpp/src/circular_buffer_queue.hpp
index 859e4ec670..73e4644cb8 100644
--- a/src/cpp/src/circular_buffer_queue.hpp
+++ b/src/cpp/src/circular_buffer_queue.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/continuous_batching_adapter.hpp b/src/cpp/src/continuous_batching_adapter.hpp
index efcd2ec382..00928b342d 100644
--- a/src/cpp/src/continuous_batching_adapter.hpp
+++ b/src/cpp/src/continuous_batching_adapter.hpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "llm_pipeline_base.hpp"
diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp
index e778e55b93..788da2b015 100644
--- a/src/cpp/src/continuous_batching_impl.cpp
+++ b/src/cpp/src/continuous_batching_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "text_callback_streamer.hpp"
@@ -33,6 +33,12 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(
     initialize_pipeline(model, scheduler_config, properties, device_config, core);
 }
 
+ContinuousBatchingPipeline::ContinuousBatchingImpl::~ContinuousBatchingImpl() {
+    if (m_scheduler) {
+        m_scheduler->release();
+    }
+}
+
 void ContinuousBatchingPipeline::ContinuousBatchingImpl::_pull_awaiting_requests() {
     std::lock_guard<std::mutex> lock{m_awaiting_requests_mutex};
     m_requests.insert(m_requests.end(), m_awaiting_requests.begin(), m_awaiting_requests.end());
@@ -61,7 +67,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::initialize_pipeline(
     ov::InferRequest infer_request = compiled_model.create_infer_request();
 
     // setup KV caches
-    m_cache_manager = std::make_shared<CacheManager>(device_config, infer_request, core);
+    std::shared_ptr<CacheManager> cache_manager = std::make_shared<CacheManager>(device_config, infer_request, core);
 
     SchedulerConfig updated_config = scheduler_config;
     // update KV blocks number in scheduler config
@@ -75,8 +81,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::initialize_pipeline(
         // as it may lead to performance slowdown
         can_use_partial_preemption = false;
     }
-    m_scheduler = std::make_shared<Scheduler>(device_config.get_block_size(), m_cache_manager, updated_config, device_config.get_num_layers(), can_use_partial_preemption);
-
+    m_scheduler = std::make_shared<Scheduler>(device_config.get_block_size(), cache_manager, updated_config, device_config.get_num_layers(), can_use_partial_preemption);
     // model runner
     bool is_use_cache_eviction = m_scheduler->get_config().use_cache_eviction;
     m_model_runner = std::make_shared<ModelRunner>(infer_request, m_scheduler->get_block_size(), device_config.get_num_layers(), is_use_cache_eviction);
@@ -158,11 +163,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
             SequenceGroup::CPtr sequence_group = m_requests[seq_group_id];
             m_batch_size += sequence_group->num_running_seqs();
         }
-
-        static ManualTimer copy_blocks_timer("scheduling");
-        copy_blocks_timer.start();
-        m_cache_manager->copy_blocks(scheduler_output.m_block_copy_map);
-        copy_blocks_timer.end();
     }
 
     // if no tokens were scheduled, we are out of memory => free all requests and return
diff --git a/src/cpp/src/continuous_batching_impl.hpp b/src/cpp/src/continuous_batching_impl.hpp
index 78e6638fbc..8980038f73 100644
--- a/src/cpp/src/continuous_batching_impl.hpp
+++ b/src/cpp/src/continuous_batching_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -13,7 +13,6 @@ namespace ov::genai {
 class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatchingPipeline::IContinuousBatchingPipeline {
 protected:
     std::shared_ptr<Scheduler> m_scheduler;
-    std::shared_ptr<CacheManager> m_cache_manager;
     std::shared_ptr<ModelRunner> m_model_runner;
     std::optional<AdapterController> m_adapter_controller;
     std::shared_ptr<Sampler> m_sampler;
@@ -89,6 +88,8 @@ class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatc
                            const ov::AnyMap& properties,
                            const ov::genai::GenerationConfig& generation_config,
                            bool is_validation_mode_enabled = false);
+    
+    virtual ~ContinuousBatchingImpl();
 
     GenerationHandle add_request(uint64_t request_id,
                                  const ov::Tensor& input_ids,
diff --git a/src/cpp/src/continuous_batching_pipeline.cpp b/src/cpp/src/continuous_batching_pipeline.cpp
index d380d1c6c4..dab9fdff22 100644
--- a/src/cpp/src/continuous_batching_pipeline.cpp
+++ b/src/cpp/src/continuous_batching_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cstdint>
diff --git a/src/cpp/src/debug_utils.hpp b/src/cpp/src/debug_utils.hpp
index 415f8c0480..4c301493ce 100644
--- a/src/cpp/src/debug_utils.hpp
+++ b/src/cpp/src/debug_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/device_config.hpp b/src/cpp/src/device_config.hpp
index fee6c7abd1..cbf3fe70c5 100644
--- a/src/cpp/src/device_config.hpp
+++ b/src/cpp/src/device_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 25402e22e7..67682be787 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/src/cpp/src/generation_handle.cpp b/src/cpp/src/generation_handle.cpp
index 0f10a85a86..5d92c560e9 100644
--- a/src/cpp/src/generation_handle.cpp
+++ b/src/cpp/src/generation_handle.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/src/cpp/src/generation_stream.hpp b/src/cpp/src/generation_stream.hpp
index 518699ba36..d76d0cf7f4 100644
--- a/src/cpp/src/generation_stream.hpp
+++ b/src/cpp/src/generation_stream.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/icontinuous_batching.cpp b/src/cpp/src/icontinuous_batching.cpp
index 8fbb9619ea..78f8fda8f7 100644
--- a/src/cpp/src/icontinuous_batching.cpp
+++ b/src/cpp/src/icontinuous_batching.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "icontinuous_batching.hpp"
diff --git a/src/cpp/src/icontinuous_batching.hpp b/src/cpp/src/icontinuous_batching.hpp
index 81fff3f40c..11c9b67e69 100644
--- a/src/cpp/src/icontinuous_batching.hpp
+++ b/src/cpp/src/icontinuous_batching.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp
index 86d8ba9009..e7daf75fff 100644
--- a/src/cpp/src/image_generation/diffusion_pipeline.hpp
+++ b/src/cpp/src/image_generation/diffusion_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp
index 458caf001b..fb6888b191 100644
--- a/src/cpp/src/image_generation/flux_pipeline.hpp
+++ b/src/cpp/src/image_generation/flux_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/generation_config.cpp b/src/cpp/src/image_generation/generation_config.cpp
index ab098fabe5..bdae31901e 100644
--- a/src/cpp/src/image_generation/generation_config.cpp
+++ b/src/cpp/src/image_generation/generation_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/generation_config.hpp"
diff --git a/src/cpp/src/image_generation/image2image_pipeline.cpp b/src/cpp/src/image_generation/image2image_pipeline.cpp
index 90c6e9fae4..8537e56ad5 100644
--- a/src/cpp/src/image_generation/image2image_pipeline.cpp
+++ b/src/cpp/src/image_generation/image2image_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <ctime>
diff --git a/src/cpp/src/image_generation/image_processor.cpp b/src/cpp/src/image_generation/image_processor.cpp
index 1e168da33b..3dabf888ab 100644
--- a/src/cpp/src/image_generation/image_processor.cpp
+++ b/src/cpp/src/image_generation/image_processor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "image_generation/image_processor.hpp"
diff --git a/src/cpp/src/image_generation/image_processor.hpp b/src/cpp/src/image_generation/image_processor.hpp
index 8c62742006..7b6fda89ad 100644
--- a/src/cpp/src/image_generation/image_processor.hpp
+++ b/src/cpp/src/image_generation/image_processor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/inpainting_pipeline.cpp b/src/cpp/src/image_generation/inpainting_pipeline.cpp
index a9179f5fd0..25ac267b69 100644
--- a/src/cpp/src/image_generation/inpainting_pipeline.cpp
+++ b/src/cpp/src/image_generation/inpainting_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <ctime>
diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp
index ab8b87a13e..bcec125375 100644
--- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp
+++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/autoencoder_kl.hpp"
@@ -22,6 +22,8 @@
 namespace ov {
 namespace genai {
 
+namespace {
+
 class DiagonalGaussianDistribution {
 public:
     explicit DiagonalGaussianDistribution(ov::Tensor parameters)
@@ -64,6 +66,29 @@ class DiagonalGaussianDistribution {
     ov::Tensor m_mean, m_std;
 };
 
+// for BW compatibility with 2024.6.0
+ov::AnyMap handle_scale_factor(std::shared_ptr<ov::Model> model, const std::string& device, ov::AnyMap properties) {
+    std::cout << ov::Any(properties).as<std::string>() << std::endl;
+
+    auto it = properties.find("WA_INFERENCE_PRECISION_HINT");
+    ov::element::Type wa_inference_precision = it != properties.end() ? it->second.as<ov::element::Type>() : ov::element::undefined;
+    if (it != properties.end()) {
+        properties.erase(it);
+    }
+
+    const std::vector<std::string> activation_scale_factor_path = { "runtime_options", ov::hint::activations_scale_factor.name() };
+    const bool activation_scale_factor_defined = model->has_rt_info(activation_scale_factor_path);
+
+    // convert WA inference precision to actual inference precision if activation_scale_factor is not defined in IR
+    if (device.find("GPU") != std::string::npos && !activation_scale_factor_defined && wa_inference_precision != ov::element::undefined) {
+        properties[ov::hint::inference_precision.name()] = wa_inference_precision;
+    }
+
+    return properties;
+}
+
+} // namespace
+
 size_t get_vae_scale_factor(const std::filesystem::path& vae_config_path) {
     std::ifstream file(vae_config_path);
     OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path);
@@ -207,14 +232,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa
     ov::Core core = utils::singleton_core();
 
     if (m_encoder_model) {
-        ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties);
+        ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, handle_scale_factor(m_encoder_model, device, properties));
         ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model");
         m_encoder_request = encoder_compiled_model.create_infer_request();
         // release the original model
         m_encoder_model.reset();
     }
 
-    ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties);
+    ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, handle_scale_factor(m_decoder_model, device, properties));
     ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model");
     m_decoder_request = decoder_compiled_model.create_infer_request();
     // release the original model
diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp
index c49bd5f000..603dad61a1 100644
--- a/src/cpp/src/image_generation/models/clip_text_model.cpp
+++ b/src/cpp/src/image_generation/models/clip_text_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/clip_text_model.hpp"
diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp
index eb9289ab3e..eaf9d88db5 100644
--- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp
+++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/clip_text_model_with_projection.hpp"
diff --git a/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp b/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp
index 648eda8ff2..3bd5bc687d 100644
--- a/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp
+++ b/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/flux_transformer_2d_model.hpp"
diff --git a/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp b/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp
index 0a7865b07a..ab04e22a1e 100644
--- a/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp
+++ b/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/sd3_transformer_2d_model.hpp"
diff --git a/src/cpp/src/image_generation/models/t5_encoder_model.cpp b/src/cpp/src/image_generation/models/t5_encoder_model.cpp
index 32ae326eca..6b69b6d901 100644
--- a/src/cpp/src/image_generation/models/t5_encoder_model.cpp
+++ b/src/cpp/src/image_generation/models/t5_encoder_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/t5_encoder_model.hpp"
diff --git a/src/cpp/src/image_generation/models/unet2d_condition_model.cpp b/src/cpp/src/image_generation/models/unet2d_condition_model.cpp
index ef35709761..26da143d5b 100644
--- a/src/cpp/src/image_generation/models/unet2d_condition_model.cpp
+++ b/src/cpp/src/image_generation/models/unet2d_condition_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/image_generation/unet2d_condition_model.hpp"
diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp
index ae928aac30..639338901b 100644
--- a/src/cpp/src/image_generation/models/unet_inference.hpp
+++ b/src/cpp/src/image_generation/models/unet_inference.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp
index dd265e3eca..6bc86a5f06 100644
--- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp
+++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp
index f63a8ea237..fd5d53e1d1 100644
--- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp
+++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/numpy_utils.hpp b/src/cpp/src/image_generation/numpy_utils.hpp
index 48c5192621..785271347d 100644
--- a/src/cpp/src/image_generation/numpy_utils.hpp
+++ b/src/cpp/src/image_generation/numpy_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/ddim.cpp b/src/cpp/src/image_generation/schedulers/ddim.cpp
index 768ba56837..2c0199051f 100644
--- a/src/cpp/src/image_generation/schedulers/ddim.cpp
+++ b/src/cpp/src/image_generation/schedulers/ddim.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cassert>
diff --git a/src/cpp/src/image_generation/schedulers/ddim.hpp b/src/cpp/src/image_generation/schedulers/ddim.hpp
index 7e042cf03e..9f3ecbf5ce 100644
--- a/src/cpp/src/image_generation/schedulers/ddim.hpp
+++ b/src/cpp/src/image_generation/schedulers/ddim.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp
index a63a073cfc..383fece163 100644
--- a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp
+++ b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cassert>
diff --git a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp
index 9d82c9a0a9..71fa13f01c 100644
--- a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp
+++ b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/euler_discrete.cpp b/src/cpp/src/image_generation/schedulers/euler_discrete.cpp
index fe5c185437..fcfc854606 100644
--- a/src/cpp/src/image_generation/schedulers/euler_discrete.cpp
+++ b/src/cpp/src/image_generation/schedulers/euler_discrete.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "image_generation/schedulers/euler_discrete.hpp"
diff --git a/src/cpp/src/image_generation/schedulers/euler_discrete.hpp b/src/cpp/src/image_generation/schedulers/euler_discrete.hpp
index 43620e7de4..0690258c9c 100644
--- a/src/cpp/src/image_generation/schedulers/euler_discrete.hpp
+++ b/src/cpp/src/image_generation/schedulers/euler_discrete.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.cpp b/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.cpp
index 21d464d7f9..265a561869 100644
--- a/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.cpp
+++ b/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "image_generation/schedulers/flow_match_euler_discrete.hpp"
diff --git a/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.hpp b/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.hpp
index 6410790b92..6399290ff3 100644
--- a/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.hpp
+++ b/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/ischeduler.hpp b/src/cpp/src/image_generation/schedulers/ischeduler.hpp
index a6f61b5343..2dadd59b1b 100644
--- a/src/cpp/src/image_generation/schedulers/ischeduler.hpp
+++ b/src/cpp/src/image_generation/schedulers/ischeduler.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/lcm.cpp b/src/cpp/src/image_generation/schedulers/lcm.cpp
index a447d23027..b232e17c11 100644
--- a/src/cpp/src/image_generation/schedulers/lcm.cpp
+++ b/src/cpp/src/image_generation/schedulers/lcm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cassert>
diff --git a/src/cpp/src/image_generation/schedulers/lcm.hpp b/src/cpp/src/image_generation/schedulers/lcm.hpp
index b9e1a55270..2dc159002a 100644
--- a/src/cpp/src/image_generation/schedulers/lcm.hpp
+++ b/src/cpp/src/image_generation/schedulers/lcm.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/lms_discrete.cpp b/src/cpp/src/image_generation/schedulers/lms_discrete.cpp
index d8c3c23745..be7d7a96df 100644
--- a/src/cpp/src/image_generation/schedulers/lms_discrete.cpp
+++ b/src/cpp/src/image_generation/schedulers/lms_discrete.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "image_generation/schedulers/lms_discrete.hpp"
diff --git a/src/cpp/src/image_generation/schedulers/lms_discrete.hpp b/src/cpp/src/image_generation/schedulers/lms_discrete.hpp
index 53a3eb8c39..dd9e4125dc 100644
--- a/src/cpp/src/image_generation/schedulers/lms_discrete.hpp
+++ b/src/cpp/src/image_generation/schedulers/lms_discrete.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/pndm.cpp b/src/cpp/src/image_generation/schedulers/pndm.cpp
index 4ddc099d0e..860b65be6f 100644
--- a/src/cpp/src/image_generation/schedulers/pndm.cpp
+++ b/src/cpp/src/image_generation/schedulers/pndm.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <cassert>
diff --git a/src/cpp/src/image_generation/schedulers/pndm.hpp b/src/cpp/src/image_generation/schedulers/pndm.hpp
index 4e346f58b3..8089a46f09 100644
--- a/src/cpp/src/image_generation/schedulers/pndm.hpp
+++ b/src/cpp/src/image_generation/schedulers/pndm.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/schedulers/scheduler.cpp b/src/cpp/src/image_generation/schedulers/scheduler.cpp
index 868f6f05cf..2df1427f10 100644
--- a/src/cpp/src/image_generation/schedulers/scheduler.cpp
+++ b/src/cpp/src/image_generation/schedulers/scheduler.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/src/cpp/src/image_generation/schedulers/types.cpp b/src/cpp/src/image_generation/schedulers/types.cpp
index 5a9e5b6865..3d8721e25f 100644
--- a/src/cpp/src/image_generation/schedulers/types.cpp
+++ b/src/cpp/src/image_generation/schedulers/types.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "image_generation/schedulers/types.hpp"
diff --git a/src/cpp/src/image_generation/schedulers/types.hpp b/src/cpp/src/image_generation/schedulers/types.hpp
index bd5a8a7258..8ffe58670b 100644
--- a/src/cpp/src/image_generation/schedulers/types.hpp
+++ b/src/cpp/src/image_generation/schedulers/types.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp
index ca7b1e9ca5..017a52a2ff 100644
--- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp
+++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -137,25 +137,17 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
 
         set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
 
-        // Temporary fix for GPU
-        ov::AnyMap updated_properties = properties;
-        if (device.find("GPU") != std::string::npos &&
-            updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
-            updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
-        }
-
         const std::string text_encoder = data["text_encoder"][1].get<std::string>();
         if (text_encoder == "CLIPTextModelWithProjection") {
             m_clip_text_encoder_1 =
-                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, updated_properties);
+                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
         } else {
             OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
         }
 
         const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
         if (text_encoder_2 == "CLIPTextModelWithProjection") {
-            m_clip_text_encoder_2 =
-                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, updated_properties);
+            m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
         } else {
             OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
         }
@@ -164,7 +156,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
         if (!text_encoder_3_json.is_null()) {
             const std::string text_encoder_3 = text_encoder_3_json.get<std::string>();
             if (text_encoder_3 == "T5EncoderModel") {
-                m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, updated_properties);
+                m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, properties);
             } else {
                 OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type");
             }
@@ -180,9 +172,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
         const std::string vae = data["vae"][1].get<std::string>();
         if (vae == "AutoencoderKL") {
             if (m_pipeline_type == PipelineType::TEXT_2_IMAGE)
-                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, updated_properties);
+                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
             else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
-                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, updated_properties);
+                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties);
             } else {
                 OPENVINO_ASSERT("Unsupported pipeline type");
             }
diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp
index 3801c855fd..2c05bdb585 100644
--- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp
+++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp
index c3ebcdf1f4..2553da93f9 100644
--- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp
+++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline {
         ov::AnyMap updated_properties = properties;
         if (device.find("GPU") != std::string::npos &&
             updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
-            updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
+            updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32;
         }
 
         const std::string vae = data["vae"][1].get<std::string>();
diff --git a/src/cpp/src/image_generation/text2image_pipeline.cpp b/src/cpp/src/image_generation/text2image_pipeline.cpp
index 56b02a2e10..d4a5b0a77b 100644
--- a/src/cpp/src/image_generation/text2image_pipeline.cpp
+++ b/src/cpp/src/image_generation/text2image_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <ctime>
diff --git a/src/cpp/src/json_utils.hpp b/src/cpp/src/json_utils.hpp
index 4a4bb001df..c7af609242 100644
--- a/src/cpp/src/json_utils.hpp
+++ b/src/cpp/src/json_utils.hpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
index 11efed8b32..6ebef7bfba 100644
--- a/src/cpp/src/llm_pipeline.cpp
+++ b/src/cpp/src/llm_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/src/cpp/src/llm_pipeline_stateful.cpp b/src/cpp/src/llm_pipeline_stateful.cpp
index 8451709092..2a53154c27 100644
--- a/src/cpp/src/llm_pipeline_stateful.cpp
+++ b/src/cpp/src/llm_pipeline_stateful.cpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "llm_pipeline_stateful.hpp"
diff --git a/src/cpp/src/llm_pipeline_stateful.hpp b/src/cpp/src/llm_pipeline_stateful.hpp
index dbf8d89391..968c550a86 100644
--- a/src/cpp/src/llm_pipeline_stateful.hpp
+++ b/src/cpp/src/llm_pipeline_stateful.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
index cffeedfc75..65f01ff193 100644
--- a/src/cpp/src/llm_pipeline_static.cpp
+++ b/src/cpp/src/llm_pipeline_static.cpp
@@ -34,6 +34,7 @@ namespace {
 namespace opp = ov::pass::pattern;
 class TransposeValueTensors : public ov::pass::MatcherPass {
 public:
+    OPENVINO_MATCHER_PASS_RTTI("TransposeValueTensors");
     struct Context {
         std::vector<std::shared_ptr<ov::opset13::Parameter>> new_params;
         std::vector<std::shared_ptr<ov::opset13::Parameter>> old_params;
@@ -95,7 +96,7 @@ class TransposeValueTensors : public ov::pass::MatcherPass {
 
 class ScaledDotProductAttentionDecomposition : public ov::pass::MatcherPass {
 public:
-    OPENVINO_RTTI("ScaledDotProductAttentionDecomposition", "0");
+    OPENVINO_MATCHER_PASS_RTTI("ScaledDotProductAttentionDecomposition");
     ScaledDotProductAttentionDecomposition() {
         auto pattern_node = ov::pass::pattern::wrap_type<ov::op::v13::ScaledDotProductAttention>();
 
@@ -769,9 +770,6 @@ void StatefulLLMPipeline::updateStatefulConfig(
     rename_key(pipeline_config, "PREFILL_CONFIG", "NPUW_LLM_PREFILL_CONFIG");
     rename_key(pipeline_config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG");
     rename_key(pipeline_config, "GENERATE_HINT", "NPUW_LLM_GENERATE_HINT");
-
-    // Replace CACHE_DIR option if NPUW is enabled
-    set_npuw_cache_dir(pipeline_config);
 }
 
 std::shared_ptr<ov::CompiledModel> StatefulLLMPipeline::setupAndCompileModel(
@@ -1510,7 +1508,7 @@ LLMPipelineFactory::create(const std::filesystem::path& models_path,
                            const std::string& device,
                            const ov::AnyMap& config) {
     auto properties = config;
-    const auto pipeline_mode = str_to_pipeline(pop_or_default(properties, "STATIC_PIPELINE", std::string("STATELESS")));
+    const auto pipeline_mode = str_to_pipeline(pop_or_default(properties, "STATIC_PIPELINE", std::string("STATEFUL")));
     if (pipeline_mode == StaticPipelineKind::STATEFUL) {
         return std::make_unique<ov::genai::static_llm::StatefulLLMPipeline>(models_path, tokenizer, device, properties);
     }
@@ -1531,7 +1529,7 @@ std::unique_ptr<LLMPipelineImplBase> LLMPipelineFactory::create(const std::share
                                                                 const ov::AnyMap& properties,
                                                                 const ov::genai::GenerationConfig& generation_config) {
     auto properties_copy = properties;
-    const auto pipeline_mode = str_to_pipeline(pop_or_default(properties_copy, "STATIC_PIPELINE", std::string("STATELESS")));
+    const auto pipeline_mode = str_to_pipeline(pop_or_default(properties_copy, "STATIC_PIPELINE", std::string("STATEFUL")));
     if (pipeline_mode == StaticPipelineKind::STATEFUL) {
         return std::make_unique<ov::genai::static_llm::StatefulLLMPipeline>(model,
                                                                             model_desc,
diff --git a/src/cpp/src/lm_encoding.cpp b/src/cpp/src/lm_encoding.cpp
index 424349f5aa..22fb0b1c02 100644
--- a/src/cpp/src/lm_encoding.cpp
+++ b/src/cpp/src/lm_encoding.cpp
@@ -29,6 +29,23 @@ void update_position_ids(ov::Tensor&& position_ids, const ov::Tensor&& attention
     }
 }
 
+void update_3d_position_ids(ov::Tensor&& position_ids, const ov::Tensor& attention_mask, const int64_t rope_delta) {
+    const size_t batch_size = attention_mask.get_shape().at(0);
+    const size_t sequence_length = attention_mask.get_shape().at(1);
+    const size_t thw_dim_size = 3;
+
+    position_ids.set_shape({thw_dim_size, batch_size, 1});
+    int64_t* position_ids_data = position_ids.data<int64_t>();
+
+    int64_t pos_id = static_cast<int64_t>(sequence_length) - 1 + rope_delta;
+
+    for (size_t batch = 0; batch < batch_size; batch++) {
+        for (size_t dim = 0; dim < thw_dim_size; ++dim) {
+            position_ids_data[dim * batch_size + batch] = pos_id;
+        }
+    }
+}
+
 void update_attention_mask_with_beams(ov::Tensor&& attention_mask, std::vector<int32_t> next_beams) {
     ov::Tensor original_mask{ov::element::i64, attention_mask.get_shape()};
     ov::Shape original_shape = original_mask.get_shape();
@@ -58,7 +75,8 @@ std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(
     Sampler& sampler,
     std::vector<SequenceGroup::Ptr> sequence_groups,
     std::optional<ov::Tensor> position_ids,
-    std::optional<EmbeddingsModel> m_embedding
+    std::optional<EmbeddingsModel> m_embedding,
+    std::optional<int64_t> rope_delta
 ) {
     std::vector<GenerationHandle> generations;
     for (SequenceGroup::Ptr sequence_group : sequence_groups) {
@@ -196,7 +214,11 @@ std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(
         update_attention_mask_with_beams(m_llm.get_tensor("attention_mask"), next_beams);
 
         if (position_ids.has_value()) {
-            update_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"));
+            if (position_ids->get_shape().size() == 3 && rope_delta.has_value()) {
+                update_3d_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"), rope_delta.value());
+            } else {
+                update_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"));
+            }
         }
 
         m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});
diff --git a/src/cpp/src/lm_encoding.hpp b/src/cpp/src/lm_encoding.hpp
index c31cffb9bc..56f6db5227 100644
--- a/src/cpp/src/lm_encoding.hpp
+++ b/src/cpp/src/lm_encoding.hpp
@@ -10,7 +10,7 @@ namespace genai {
 
 std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(ov::InferRequest& m_llm, const ov::Tensor& input_ids, const ov::Tensor& attention_mask,
                                                                          const std::shared_ptr<StreamerBase>& streamer_ptr, Sampler& sampler, std::vector<SequenceGroup::Ptr> sequence_groups,
-                                                                         std::optional<ov::Tensor> position_ids, std::optional<EmbeddingsModel> m_embedding);
+                                                                         std::optional<ov::Tensor> position_ids, std::optional<EmbeddingsModel> m_embedding, std::optional<int64_t> rope_delta = std::nullopt);
 
 }
 }
diff --git a/src/cpp/src/logit_processor.hpp b/src/cpp/src/logit_processor.hpp
index a81a33017d..a9446591cf 100644
--- a/src/cpp/src/logit_processor.hpp
+++ b/src/cpp/src/logit_processor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp
index e060e55160..2fd487f37f 100644
--- a/src/cpp/src/lora_adapter.cpp
+++ b/src/cpp/src/lora_adapter.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <algorithm>
@@ -473,7 +473,7 @@ struct LoRAWeightStateGetter {
 class LoRATransformBase : public ov::pass::MatcherPass {
 public:
 
-    OPENVINO_RTTI("LoRATransformBase");
+    OPENVINO_MATCHER_PASS_RTTI("LoRATransformBase");
 
     LoRATransformBase(const LoRAWeightByNodeGetter& lora_weight_getter) {
         register_matcher(
@@ -693,7 +693,7 @@ class LoRAFuseTransform : public LoRATransformBase {
 
 public:
 
-    OPENVINO_RTTI("LoRAFuseTransform");
+    OPENVINO_RTTI("LoRAFuseTransform", "genai", LoRATransformBase);
 
     LoRAFuseTransform(const LoRAWeightByNodeGetter& lora_weight_getter, const std::string& device_for_fusion = "CPU") :
         LoRATransformBase(lora_weight_getter),
@@ -763,7 +763,7 @@ class LoRAFuseTransform : public LoRATransformBase {
 class LoRASeparateTransform : public LoRATransformBase {
 public:
 
-    OPENVINO_RTTI("LoRASeparateTransform");
+    OPENVINO_RTTI("LoRASeparateTransform", "genai", LoRATransformBase);
 
     LoRASeparateTransform(const LoRAWeightByNodeGetter& lora_getter) : LoRATransformBase(lora_getter) {}
 
diff --git a/src/cpp/src/lora_names_mapping.cpp b/src/cpp/src/lora_names_mapping.cpp
index 0be3e740b4..03bf2ed93c 100644
--- a/src/cpp/src/lora_names_mapping.cpp
+++ b/src/cpp/src/lora_names_mapping.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 // Content of this file is a C++ port of the name mapping for LoRA tensors from HuggingFace diffusers/loaders/lora_conversion_utils.py
diff --git a/src/cpp/src/make_tokenizer_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp
index 4685b0e715..547ecdac92 100644
--- a/src/cpp/src/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/make_tokenizer_stateful.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "make_tokenizer_stateful.hpp"
diff --git a/src/cpp/src/make_tokenizer_stateful.hpp b/src/cpp/src/make_tokenizer_stateful.hpp
index 307c6199c8..9ad06ae07a 100644
--- a/src/cpp/src/make_tokenizer_stateful.hpp
+++ b/src/cpp/src/make_tokenizer_stateful.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/op/constant.hpp"
@@ -34,7 +34,7 @@ namespace genai {
 **/
 class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
 public:
-    OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0");
+    OPENVINO_MODEL_PASS_RTTI("MakeCombineSegmentsSatateful");
     bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
 };
 
@@ -70,7 +70,7 @@ class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
 **/
 class MakeVocabDecoderSatateful : public ov::pass::ModelPass {
 public:
-    OPENVINO_RTTI("MakeVocabDecoderSatateful", "0");
+    OPENVINO_MODEL_PASS_RTTI("MakeVocabDecoderSatateful");
     bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
 };
 
diff --git a/src/cpp/src/model_runner.hpp b/src/cpp/src/model_runner.hpp
index 27eee9e27d..d12e69052b 100644
--- a/src/cpp/src/model_runner.hpp
+++ b/src/cpp/src/model_runner.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/perf_metrics.cpp b/src/cpp/src/perf_metrics.cpp
index f4ec0eb49a..a84b83dd2f 100644
--- a/src/cpp/src/perf_metrics.cpp
+++ b/src/cpp/src/perf_metrics.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/perf_metrics.hpp"
diff --git a/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp b/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp
index d6e2746244..d01c863549 100644
--- a/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp
+++ b/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "continuous_batching_for_prompt_lookup.hpp"
diff --git a/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.hpp b/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.hpp
index e439fd03e1..fc4942701e 100644
--- a/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.hpp
+++ b/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp
index 68aada91dd..41c3e6370f 100644
--- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp
+++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "prompt_lookup_impl.hpp"
diff --git a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp
index e19042f44e..1499bcc76e 100644
--- a/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp
+++ b/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/scheduler.hpp b/src/cpp/src/scheduler.hpp
index 0057b19329..86f705f759 100644
--- a/src/cpp/src/scheduler.hpp
+++ b/src/cpp/src/scheduler.hpp
@@ -1,5 +1,5 @@
 
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -13,13 +13,14 @@
 #include "block_manager.hpp"
 #include "sequence_group.hpp"
 #include "cache_manager.hpp"
+#include "timer.hpp"
 
 namespace ov::genai {
 class Scheduler {
     bool m_can_use_partial_preemption;
 
     SchedulerConfig m_config;
-    BlockManager m_block_manager;
+    std::shared_ptr<BlockManager> m_block_manager;
     friend class CacheStateDumper;
 
     bool m_dynamic_memory_allocation = false;
@@ -33,8 +34,6 @@ class Scheduler {
     struct Output {
         // IDs of scheduled groups
         std::vector<uint64_t> m_scheduled_sequence_groups_ids;
-        // map of src -> dst blocks copies, which need to be performed by CacheManager
-        std::map<size_t, std::list<size_t>> m_block_copy_map;
         // block tables for scheduled sequences per each attention layer in the model
         std::map<uint64_t, std::vector<BlocksPerLayer>> m_block_tables;
         // total number of scheduled tokens
@@ -48,26 +47,32 @@ class Scheduler {
     explicit Scheduler(size_t block_size, std::shared_ptr<CacheManager> cache_manager, const SchedulerConfig & config = {}, size_t num_layers = 1, bool can_use_partial_preemption = true) :
             m_cache_manager(cache_manager),
             m_can_use_partial_preemption(can_use_partial_preemption),
-            m_config(config),
-            m_block_manager(m_config.num_kv_blocks, m_config.enable_prefix_caching, block_size, num_layers) {
-        
+            m_config(config) {
+        m_block_manager = std::make_shared<BlockManager>(m_config.num_kv_blocks, m_config.enable_prefix_caching, block_size, num_layers);
         OPENVINO_ASSERT(num_layers != 0, "num_layers must be non-zero");
     }
 
+    void release() {
+        m_cache_manager.reset();
+        m_block_manager.reset();
+    }
+
     Output schedule(std::vector<SequenceGroup::Ptr>& sequence_groups) {
         Output scheduler_output;
+        // map of src -> dst blocks copies, which need to be performed by CacheManager
+        std::map<size_t, std::list<size_t>> block_copy_map;
 
         // free some blocks taken by non-confirmed condidates in SD / prompt look-up
         clean_empty_blocks(sequence_groups);
 
-        if (m_block_manager.get_total_number_of_kv_blocks() == 0) {
+        if (m_block_manager->get_total_number_of_kv_blocks() == 0) {
             _initialize_cache(sequence_groups);
         }
 
         if (m_config.dynamic_split_fuse) {
             // deepspeed-mii case
             // generation phase is always scheduled first
-            _schedule_generate_phase_dynamic_split_fuse(sequence_groups, scheduler_output);
+            _schedule_generate_phase_dynamic_split_fuse(sequence_groups, scheduler_output, block_copy_map);
             // some tokens from generation prompt are also scheduled
             _schedule_prompt_phase_dynamic_split_fuse(sequence_groups, scheduler_output);
         } else {
@@ -78,13 +83,19 @@ class Scheduler {
 
             if (!scheduler_output.is_prompt) {
                 // prompt sequences are not scheduler => scheduler generation phase by dynamic_split_fuse implementation
-                _schedule_generate_phase_dynamic_split_fuse(sequence_groups, scheduler_output);
+                _schedule_generate_phase_dynamic_split_fuse(sequence_groups, scheduler_output, block_copy_map);
             }
         }
 
-        m_cache_manager->allocate_cache_if_needed(m_block_manager.get_total_number_of_kv_blocks());
+        m_cache_manager->allocate_cache_if_needed(m_block_manager->get_total_number_of_kv_blocks());
         _clear_waiting_sequences(sequence_groups);
-        scheduler_output.m_cache_usage = m_block_manager.get_used_percentage();
+        scheduler_output.m_cache_usage = m_block_manager->get_used_percentage();
+
+        static ManualTimer copy_blocks_timer("copy block");
+        copy_blocks_timer.start();
+        m_cache_manager->copy_blocks(block_copy_map);
+        copy_blocks_timer.end();
+
         return scheduler_output;
     }
 
@@ -94,31 +105,31 @@ class Scheduler {
      */
     void clean_empty_blocks(std::vector<SequenceGroup::Ptr>& seq_groups) {
         for (const auto& seq_group : seq_groups)
-            m_block_manager.free_empty_physical_blocks(seq_group);
+            m_block_manager->free_empty_physical_blocks(seq_group);
     }
 
     const std::vector<BlocksPerLayer>& get_block_tables(const Sequence& seq) const {
-        return m_block_manager.get_block_tables(seq.get_id());
+        return m_block_manager->get_block_tables(seq.get_id());
     }
 
     const size_t get_block_size() const {
-        return m_block_manager.get_block_size();
+        return m_block_manager->get_block_size();
     }
 
     const bool has_block_table(uint64_t seq_id) {
-        return m_block_manager.has_block_table(seq_id);
+        return m_block_manager->has_block_table(seq_id);
     }
 
     void free_sequence(uint64_t seq_id) {
-        m_block_manager.free_sequence(seq_id);
+        m_block_manager->free_sequence(seq_id);
     }
 
     void fork_sequence(uint64_t parent_id, uint64_t child_id) {
-        m_block_manager.fork_sequence(parent_id, child_id);
+        m_block_manager->fork_sequence(parent_id, child_id);
     }
 
     void restore_cached_blocks(const SequenceGroup::Ptr& sequence_group) {
-        m_block_manager.restore_cached_blocks(sequence_group);
+        m_block_manager->restore_cached_blocks(sequence_group);
     }
 
     const SchedulerConfig& get_config() const {
@@ -126,7 +137,7 @@ class Scheduler {
     }
 
     void free_blocks_from_sequence(size_t seq_id, const std::vector<std::set<size_t>>& per_layer_logical_block_indices_to_free) {
-        m_block_manager.free_blocks_from_sequence(seq_id, per_layer_logical_block_indices_to_free);
+        m_block_manager->free_blocks_from_sequence(seq_id, per_layer_logical_block_indices_to_free);
     }
 
 private:
@@ -143,31 +154,31 @@ class Scheduler {
 
     bool _preempt_by_recompute(SequenceGroup::Ptr sequence_group, size_t blocks_needed) {
         size_t processed_tokens = sequence_group->get_num_processed_tokens();
-        size_t prev_blocks_count = m_block_manager.num_free_blocks();
+        size_t prev_blocks_count = m_block_manager->num_free_blocks();
         size_t preempted_tokens = 0;
-        size_t num_blocks_occupied_by_sequence = m_block_manager.get_number_of_blocks_occupied_by_sequence(sequence_group);
+        size_t num_blocks_occupied_by_sequence = m_block_manager->get_number_of_blocks_occupied_by_sequence(sequence_group);
         bool was_evicted_from = (sequence_group->get_num_evicted_tokens() != 0);
 
         if (num_blocks_occupied_by_sequence <= blocks_needed || !m_can_use_partial_preemption || was_evicted_from) {
             auto sequences = sequence_group->get_not_finished_sequences();
             for (size_t s = 0; s < sequences.size(); ++s) {
                 auto seq_id = sequences[s]->get_id();
-                m_block_manager.free_sequence(seq_id);
+                m_block_manager->free_sequence(seq_id);
             }
             sequence_group->preempt_tokens(processed_tokens);
             if (was_evicted_from) {
                 sequence_group->reset_eviction_token_count();
             }
             sequence_group->set_waiting();
-            return m_block_manager.num_free_blocks() > prev_blocks_count;
+            return m_block_manager->num_free_blocks() > prev_blocks_count;
         }
 
         size_t logical_blocks_released;
         if (sequence_group->get_sampling_parameters().is_beam_search()) {
-            logical_blocks_released = m_block_manager.free_partially_beam_search_group(sequence_group, blocks_needed);
+            logical_blocks_released = m_block_manager->free_partially_beam_search_group(sequence_group, blocks_needed);
         }
         else {
-            logical_blocks_released = m_block_manager.free_group_partially(sequence_group, blocks_needed);
+            logical_blocks_released = m_block_manager->free_group_partially(sequence_group, blocks_needed);
         }
 
         size_t block_size = get_block_size();
@@ -184,14 +195,14 @@ class Scheduler {
             preempted_tokens = processed_tokens;
             for (auto sequence: sequence_group->get_not_finished_sequences()) {
                 auto seq_id = sequence->get_id();
-                if (m_block_manager.has_block_table(seq_id)) {
-                    m_block_manager.free_sequence(seq_id);
+                if (m_block_manager->has_block_table(seq_id)) {
+                    m_block_manager->free_sequence(seq_id);
                 }
             }
         }
         sequence_group->preempt_tokens(preempted_tokens);
         sequence_group->set_waiting();
-        return m_block_manager.num_free_blocks() > prev_blocks_count;
+        return m_block_manager->num_free_blocks() > prev_blocks_count;
     }
 
     static size_t _get_low_priority_sequence_group_id(const std::vector<SequenceGroup::Ptr>& sequence_groups) {
@@ -212,7 +223,7 @@ class Scheduler {
         SequenceGroup::Ptr sequence_group = sequence_groups[sequence_group_id];
 
         // check whether current sequence requires a new slot / block
-        while (!m_block_manager.can_append_slots(sequence_group)) {
+        while (!m_block_manager->can_append_slots(sequence_group)) {
             // let's run a sequence for eviction
             size_t evicted_sequence_group_id = _get_low_priority_sequence_group_id(sequence_groups);
 
@@ -220,7 +231,7 @@ class Scheduler {
                 // we have a cycle when current group need to evict itself to be in a running state
                 break;
             }
-            size_t blocks_needed = m_block_manager.required_blocks_count(sequence_group);
+            size_t blocks_needed = m_block_manager->required_blocks_count(sequence_group);
             if (!_preempt_by_recompute(sequence_groups[evicted_sequence_group_id], blocks_needed)){
                 break;
             }
@@ -259,12 +270,12 @@ class Scheduler {
                 size_t available_slots = currently_allocated_token_slots - occupied_token_slots,
                        required_slots = num_scheduled_tokens > available_slots ? num_scheduled_tokens - available_slots : 0;
                 size_t num_required_blocks = (required_slots + block_size - 1) / block_size;
-                while (num_required_blocks > m_block_manager.num_free_blocks()) {
+                while (num_required_blocks > m_block_manager->num_free_blocks()) {
                     if (!_try_increase_cache()) {
                         break;
                     }
                 }
-                size_t num_scheduled_blocks = std::min(num_required_blocks, m_block_manager.num_free_blocks());
+                size_t num_scheduled_blocks = std::min(num_required_blocks, m_block_manager->num_free_blocks());
                 // some scheduled blocks can be no fully occupied, so we need to take min between num_scheduled_blocks
                 // and total "scheduled capacity"
                 num_scheduled_tokens = std::min(num_scheduled_tokens, available_slots + num_scheduled_blocks * block_size);
@@ -272,14 +283,14 @@ class Scheduler {
                 if (num_scheduled_tokens > 0) {
                     // allocate KV blocks if required
                     if (num_scheduled_blocks > 0)
-                        m_block_manager.allocate(sequence, num_scheduled_blocks, sequence_group->get_prompt_ids());
+                        m_block_manager->allocate(sequence, num_scheduled_blocks, sequence_group->get_prompt_ids());
                     // and schedule tokens
                     sequence_group->schedule_tokens(num_scheduled_tokens);
 
                     // add information to scheduler_output
                     {
                         scheduler_output.m_scheduled_sequence_groups_ids.push_back(sequence_group_id);
-                        scheduler_output.m_block_tables[seq_id] = m_block_manager.get_block_tables(seq_id);
+                        scheduler_output.m_block_tables[seq_id] = m_block_manager->get_block_tables(seq_id);
                         scheduler_output.m_total_num_scheduled_tokens += num_scheduled_tokens * num_running_seqs;
                     }
                 }
@@ -291,7 +302,9 @@ class Scheduler {
         }
     }
 
-    void _schedule_generate_phase_dynamic_split_fuse(const std::vector<SequenceGroup::Ptr>& sequence_groups, Output& scheduler_output) {
+    void _schedule_generate_phase_dynamic_split_fuse(const std::vector<SequenceGroup::Ptr>& sequence_groups,
+                                                     Output& scheduler_output,
+                                                     std::map<size_t, std::list<size_t>>& block_copy_map) {
         for (size_t sequence_group_id = 0; sequence_group_id < sequence_groups.size(); ++sequence_group_id) {
             SequenceGroup::Ptr sequence_group = sequence_groups[sequence_group_id];
             // Note, that can_generate_tokens will mix preempted sequence groups
@@ -316,7 +329,7 @@ class Scheduler {
                 size_t num_scheduled_tokens_per_seq = std::min(available_tokens_per_seq_in_megabatch, num_available_tokens_per_seq);
                 sequence_group->schedule_tokens(num_scheduled_tokens_per_seq);
 
-                while (!m_block_manager.can_append_slots(sequence_group)){
+                while (!m_block_manager->can_append_slots(sequence_group)){
                     if (!_try_increase_cache()) {
                         break;
                     }
@@ -325,13 +338,13 @@ class Scheduler {
                 _apply_preemption(sequence_group_id, sequence_groups);
 
                 // if we can't preemt any more sequences, clear scheduled tokens and move to next sequence
-                if (!m_block_manager.can_append_slots(sequence_group)) {
+                if (!m_block_manager->can_append_slots(sequence_group)) {
                     sequence_group->clear_scheduled_tokens();
                     continue;
                 }
 
                 // allocate new slots
-                std::map<size_t, std::list<size_t>> copy_blocks_map = m_block_manager.append_slots(sequence_group);
+                std::map<size_t, std::list<size_t>> copy_blocks_map = m_block_manager->append_slots(sequence_group);
 
                 // add information to scheduler_output
                 {
@@ -342,7 +355,7 @@ class Scheduler {
                     // block tables for each running sequence within a group
                     std::vector<Sequence::Ptr> running_seqs = sequence_group->get_running_sequences();
                     for (const auto & seq : sequence_group->get_running_sequences()) {
-                        scheduler_output.m_block_tables[seq->get_id()] = m_block_manager.get_block_tables(seq->get_id());
+                        scheduler_output.m_block_tables[seq->get_id()] = m_block_manager->get_block_tables(seq->get_id());
                     }
 
                     // merge copy_blocks
@@ -350,7 +363,7 @@ class Scheduler {
                         size_t src_index = src_dst.first;
                         const std::list<size_t>& dst_indexes = src_dst.second;
                         for (const auto dst_index : dst_indexes)
-                            scheduler_output.m_block_copy_map[src_index].push_back(dst_index);
+                            block_copy_map[src_index].push_back(dst_index);
                     }
                 }
 
@@ -403,12 +416,12 @@ class Scheduler {
                 // apply KV cache limitations
                 size_t block_size = get_block_size();
                 const size_t num_required_blocks = (sequence_len + block_size - 1) / block_size;
-                while (!m_block_manager.can_allocate_blocks(num_required_blocks)){
+                while (!m_block_manager->can_allocate_blocks(num_required_blocks)){
                     if (!_try_increase_cache()) {
                         break;
                     }
                 }
-                if (!m_block_manager.can_allocate_blocks(num_required_blocks))
+                if (!m_block_manager->can_allocate_blocks(num_required_blocks))
                     break;
 
                 // add scheduling information
@@ -419,13 +432,13 @@ class Scheduler {
                     sequence_group->schedule_tokens(sequence_len);
 
                     // allocate KV blocks
-                    m_block_manager.append_slots(sequence_group);
+                    m_block_manager->append_slots(sequence_group);
 
                     // add information to scheduler_output
                     {
                         scheduler_output.m_scheduled_sequence_groups_ids.push_back(sequence_group_id);
                         uint64_t seq_id = sequence_group->get_running_sequences()[0]->get_id();
-                        scheduler_output.m_block_tables[seq_id] = m_block_manager.get_block_tables(seq_id);
+                        scheduler_output.m_block_tables[seq_id] = m_block_manager->get_block_tables(seq_id);
                         scheduler_output.m_total_num_scheduled_tokens += sequence_len;
                     }
 
@@ -481,7 +494,7 @@ class Scheduler {
             auto seq_length = sequence_groups[idx]->get_prompt_len() * m_kv_blocks_initial_multiplier;
             auto gen_config = sequence_groups[idx]->get_sampling_parameters();
             seq_length = std::min(seq_length, sequence_groups[idx]->get_prompt_len() + gen_config.get_max_new_tokens(sequence_groups[idx]->get_prompt_len()));
-            size_t blocks_num = std::ceil((float)seq_length / m_block_manager.get_block_size());
+            size_t blocks_num = std::ceil((float)seq_length / m_block_manager->get_block_size());
             if (gen_config.is_beam_search()) {
                 blocks_num *= gen_config.num_beams;
             } else if (gen_config.is_multinomial()) {
@@ -489,7 +502,7 @@ class Scheduler {
             }
             blocks_sum  += blocks_num;
         }
-        m_block_manager.increase_kv_blocks_number(blocks_sum);
+        m_block_manager->increase_kv_blocks_number(blocks_sum);
         m_dynamic_memory_allocation = true;
     }
 
@@ -499,21 +512,21 @@ class Scheduler {
         }
         auto device_config = m_cache_manager->get_device_config();
         auto device = device_config->get_device();
-        size_t current_num_of_kv_blocks = m_block_manager.get_total_number_of_kv_blocks();
+        size_t current_num_of_kv_blocks = m_block_manager->get_total_number_of_kv_blocks();
         size_t new_blocks_num = current_num_of_kv_blocks * m_cache_growth_factor;
 
         if (device.find("GPU") == std::string::npos) {
-            m_block_manager.increase_kv_blocks_number(new_blocks_num);
+            m_block_manager->increase_kv_blocks_number(new_blocks_num);
         }
         else {
             size_t available_gpu_memory = _get_available_gpu_memory();
             size_t required_memory = (new_blocks_num - current_num_of_kv_blocks) * device_config->get_block_size_in_bytes();
             if (required_memory <= available_gpu_memory) {
-                m_block_manager.increase_kv_blocks_number(new_blocks_num);
+                m_block_manager->increase_kv_blocks_number(new_blocks_num);
             } else {
                 size_t possible_blocks_to_add = available_gpu_memory / device_config->get_block_size_in_bytes();
                 if (possible_blocks_to_add > 0) {
-                    m_block_manager.increase_kv_blocks_number(current_num_of_kv_blocks + possible_blocks_to_add);
+                    m_block_manager->increase_kv_blocks_number(current_num_of_kv_blocks + possible_blocks_to_add);
                 }
                 else {
                     return false;
diff --git a/src/cpp/src/sequence_group.cpp b/src/cpp/src/sequence_group.cpp
index 854fc85777..7b9265db1a 100644
--- a/src/cpp/src/sequence_group.cpp
+++ b/src/cpp/src/sequence_group.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <string_view>
diff --git a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
index e5468baa9d..dccc633d4d 100644
--- a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
+++ b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "continuous_batching_for_speculative_decoding_impl.hpp"
diff --git a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
index 4d476b8cbc..3777d9b87b 100644
--- a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
+++ b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
index 8b2730a62b..5483523698 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "text_callback_streamer.hpp"
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
index b84b5b8590..7475d9d766 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_metrics.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_metrics.cpp
index 4e5602482a..2357b99fd7 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_metrics.cpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_metrics.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <numeric>
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_metrics.hpp b/src/cpp/src/speculative_decoding/speculative_decoding_metrics.hpp
index 0d9173b99f..d4a14a1692 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_metrics.hpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_metrics.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/speculative_decoding/update_request_structs.hpp b/src/cpp/src/speculative_decoding/update_request_structs.hpp
index bd9bb3b67e..68f79268f5 100644
--- a/src/cpp/src/speculative_decoding/update_request_structs.hpp
+++ b/src/cpp/src/speculative_decoding/update_request_structs.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/synchronized_queue.hpp b/src/cpp/src/synchronized_queue.hpp
index 55649a7032..70883bcae7 100644
--- a/src/cpp/src/synchronized_queue.hpp
+++ b/src/cpp/src/synchronized_queue.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/text_callback_streamer.cpp b/src/cpp/src/text_callback_streamer.cpp
index 5938b55f6c..4c4db4311f 100644
--- a/src/cpp/src/text_callback_streamer.cpp
+++ b/src/cpp/src/text_callback_streamer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "text_callback_streamer.hpp"
@@ -15,23 +15,36 @@ bool TextCallbackStreamer::put(int64_t token) {
     std::stringstream res;
     m_tokens_cache.push_back(token);
     std::string text = m_tokenizer.decode(m_tokens_cache);
-    if (!text.empty() && '\n' == text.back() && text.size() > print_len) {
+    m_decoded_lengths.push_back(text.length());
+    
+    if (!text.empty() && '\n' == text.back() && text.size() > m_printed_len) {
         // Flush the cache after the new line symbol
-        res << std::string_view{text.data() + print_len, text.size() - print_len};
+        res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len};
         m_tokens_cache.clear();
-        print_len = 0;
+        m_decoded_lengths.clear();
+        m_printed_len = 0;
         return on_finalized_subword_callback(res.str());
     }
 
+    constexpr size_t delay_n_tokens = 3;
+    // In some cases adding the next token can shorten the text, 
+    // e.g. when apostrophe removing regex had worked after adding new tokens.
+    // Printing several last tokens is delayed.
+    if (m_decoded_lengths.size() < delay_n_tokens) {
+        return on_finalized_subword_callback(res.str());
+    }
     constexpr char replacement[] = "\xef\xbf\xbd";  // MSVC with /utf-8 fails to compile � directly with newline in string literal error.
     if (text.size() >= 3 && text.compare(text.size() - 3, 3, replacement) == 0) {
+        m_decoded_lengths[m_decoded_lengths.size() - 1] = -1;
         // Don't print incomplete text
         return on_finalized_subword_callback(res.str());
-    } else if (text.size() > print_len) {
+    }
+    auto print_until = m_decoded_lengths[m_decoded_lengths.size() - delay_n_tokens];
+    if (print_until != -1 && print_until > m_printed_len) {
         // It is possible to have a shorter text after adding new token.
         // Print to output only if text length is increaesed.
-        res << std::string_view{text.data() + print_len, text.size() - print_len} << std::flush;
-        print_len = text.size();
+        res << std::string_view{text.data() + m_printed_len, print_until - m_printed_len} << std::flush;
+        m_printed_len = print_until;
     }
 
     return on_finalized_subword_callback(res.str());
@@ -40,11 +53,12 @@ bool TextCallbackStreamer::put(int64_t token) {
 void TextCallbackStreamer::end() {
     std::stringstream res;
     std::string text = m_tokenizer.decode(m_tokens_cache);
-    if (text.size() <= print_len)
-        return ;
-    res << std::string_view{text.data() + print_len, text.size() - print_len} << std::flush;
+    if (text.size() <= m_printed_len)
+        return;
+    res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len} << std::flush;
     m_tokens_cache.clear();
-    print_len = 0;
+    m_decoded_lengths.clear();
+    m_printed_len = 0;
     on_finalized_subword_callback(res.str());
     return;
 }
diff --git a/src/cpp/src/text_callback_streamer.hpp b/src/cpp/src/text_callback_streamer.hpp
index 6f0872ad1b..2c5fab5700 100644
--- a/src/cpp/src/text_callback_streamer.hpp
+++ b/src/cpp/src/text_callback_streamer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -21,7 +21,8 @@ class TextCallbackStreamer: public StreamerBase {
 protected:
     Tokenizer m_tokenizer;
     std::vector<int64_t> m_tokens_cache;
-    size_t print_len = 0;
+    std::vector<int64_t> m_decoded_lengths;
+    size_t m_printed_len = 0;
 };
 
 }  // namespace genai
diff --git a/src/cpp/src/timer.hpp b/src/cpp/src/timer.hpp
index de23e56f60..f389e10d5d 100644
--- a/src/cpp/src/timer.hpp
+++ b/src/cpp/src/timer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp
index 03e3b0a5d3..b22f103577 100644
--- a/src/cpp/src/tokenizer.cpp
+++ b/src/cpp/src/tokenizer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
@@ -575,13 +575,18 @@ class Tokenizer::TokenizerImpl {
             {"slice", slice_callable},
         };
 
+        std::string result;
         try {
-            return tpl.RenderAsString(params).value();
+            result = tpl.RenderAsString(params).value();
         } catch (const std::exception& error) {
             OPENVINO_THROW("Chat template for the current model is not supported by Jinja2Cpp. "
                            "Please apply template manually to your prompt before calling generate. "
                            "For example: <start_of_turn>user{user_prompt}<end_of_turn><start_of_turn>model");
         }
+        OPENVINO_ASSERT(!result.empty(), "Applied chat template resulted in an empty string. "
+                                         "Please check the chat template or apply template manually to your prompt before calling generate."
+                                         "For example: <start_of_turn>user{user_prompt}<end_of_turn><start_of_turn>model");
+        return result;
     }
 
     void set_chat_template(const std::string& chat_template) {
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index 9261aa7a4a..5b17b2eacf 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "utils.hpp"
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index ad0e1a05d4..ff1aea1ae9 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/utils/paged_attention_transformations.cpp b/src/cpp/src/utils/paged_attention_transformations.cpp
index f564be8f19..baef7d8dd6 100644
--- a/src/cpp/src/utils/paged_attention_transformations.cpp
+++ b/src/cpp/src/utils/paged_attention_transformations.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "utils/paged_attention_transformations.hpp"
diff --git a/src/cpp/src/utils/paged_attention_transformations.hpp b/src/cpp/src/utils/paged_attention_transformations.hpp
index 2cb32adcdc..c21b06a6f5 100644
--- a/src/cpp/src/utils/paged_attention_transformations.hpp
+++ b/src/cpp/src/utils/paged_attention_transformations.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/visual_language/clip.cpp b/src/cpp/src/visual_language/clip.cpp
index fba8b10d4f..30a6dff5ae 100644
--- a/src/cpp/src/visual_language/clip.cpp
+++ b/src/cpp/src/visual_language/clip.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 // Based on clip.cpp
diff --git a/src/cpp/src/visual_language/clip.hpp b/src/cpp/src/visual_language/clip.hpp
index 55cf03a49f..4bdb4542d0 100644
--- a/src/cpp/src/visual_language/clip.hpp
+++ b/src/cpp/src/visual_language/clip.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp
index a2a9750c33..ebc2f80bbd 100644
--- a/src/cpp/src/visual_language/embedding_model.cpp
+++ b/src/cpp/src/visual_language/embedding_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/src/cpp/src/visual_language/inputs_embedder.cpp b/src/cpp/src/visual_language/inputs_embedder.cpp
index 9f228cafa3..4f3812862c 100644
--- a/src/cpp/src/visual_language/inputs_embedder.cpp
+++ b/src/cpp/src/visual_language/inputs_embedder.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/visual_language/perf_metrics.hpp"
@@ -52,6 +52,12 @@ class InputsEmbedder::IInputsEmbedder {
 public:
     virtual ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images, ov::genai::VLMPerfMetrics& metrics) = 0;
 
+    virtual std::pair<ov::Tensor, std::optional<int64_t>> get_position_ids(const size_t inputs_embeds_size, const size_t history_size) {
+        ov::Tensor position_ids = ov::Tensor{ov::element::i64, { 1, inputs_embeds_size }};
+        std::iota(position_ids.data<int64_t>(), position_ids.data<int64_t>() + position_ids.get_size(), history_size);
+        return {position_ids, std::nullopt};
+    }
+
     EmbeddingsModel get_embedding_model() const {
         return m_embedding;
     }
@@ -1157,6 +1163,408 @@ class InputsEmbedderInternVLChat : public InputsEmbedder::IInputsEmbedder {
     }
 };
 
+class InputsEmbedderQwen2VL : public InputsEmbedder::IInputsEmbedder {
+    // A model for merging image embeddings (hidden states), rotary_pos_emb and attension_mask.
+    // Inputs:
+    //  - hidden_states: [N, embed_dim]
+    //  - rotary_pos_emb: [?, 40]
+    //  - attention_mask: [1, ?, ?]
+    // Output: [N, hidden_size]
+    ov::InferRequest m_vision_embeddings_merger;
+
+    ov::Tensor m_position_ids;
+    int64_t m_rope_delta = 0;
+
+public:
+    InputsEmbedderQwen2VL(
+        const VLMConfig& vlm_config,
+        const std::filesystem::path& model_dir,
+        const std::string& device,
+        const ov::AnyMap device_config) :
+        IInputsEmbedder(vlm_config, model_dir, device, device_config) {
+            auto compiled_model = utils::singleton_core().compile_model(model_dir / "openvino_vision_embeddings_merger_model.xml", device, device_config);
+            ov::genai::utils::print_compiled_model_properties(compiled_model, "VLM vision embeddings merger model");
+            m_vision_embeddings_merger = compiled_model.create_infer_request();
+        }
+
+    InputsEmbedderQwen2VL(
+        const VLMConfig& vlm_config,
+        const ModelsMap& models_map,
+        const Tokenizer& tokenizer, 
+        const std::filesystem::path& config_dir_path,
+        const std::string& device,
+        const ov::AnyMap device_config) :
+        IInputsEmbedder(vlm_config, models_map, tokenizer, config_dir_path, device, device_config) {
+            m_vision_embeddings_merger = utils::singleton_core().compile_model(
+                get_model_weights_pair(models_map, "vision_embeddings_merger").first,
+                get_model_weights_pair(models_map, "vision_embeddings_merger").second,
+                device,
+                device_config
+            ).create_infer_request();
+        }
+    
+    virtual ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images, ov::genai::VLMPerfMetrics& metrics) override {
+        std::string formatted_prompt;
+
+        std::vector<ov::Tensor> single_images = to_single_image_tensors(images);
+        std::vector<ov::Tensor> image_embeds;
+        std::vector<std::array<size_t, 3>> images_grid_thw;
+        image_embeds.reserve(single_images.size());
+        images_grid_thw.reserve(single_images.size());
+        
+        for (const auto& image : single_images) {
+            EncodedImage encoded_image = m_vision_encoder.encode(image);
+            ov::Tensor single_image_embeds = encoded_image.resized_source;
+            image_embeds.push_back(std::move(single_image_embeds));
+
+            size_t grid_t = 1;
+            size_t grid_h = encoded_image.resized_source_size.height;
+            size_t grid_w = encoded_image.resized_source_size.width;
+            images_grid_thw.push_back({grid_t, grid_h, grid_w});
+
+            size_t merge_length = std::pow(m_vision_encoder.m_processor_config.merge_size, 2);
+            size_t num_image_pad_tokens = grid_t * grid_h * grid_w / merge_length;
+
+            formatted_prompt += m_vlm_config.vision_start_token;
+            for (int i = 0; i < num_image_pad_tokens; i++) {
+                formatted_prompt += m_vlm_config.image_pad_token;
+            }
+            formatted_prompt += m_vlm_config.vision_end_token;
+        }
+        formatted_prompt += prompt;
+
+        // Adapted from Qwen/Qwen2-7B-Instruct
+        std::string chat_template_fallback = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}";
+        ov::Tensor input_ids = get_encoded_input_ids(formatted_prompt, metrics, chat_template_fallback);
+        ov::Tensor text_embeds = m_embedding.infer(input_ids);
+
+        if (images.empty()) {
+            return text_embeds;
+        }
+        
+        auto start_tokenizer_time = std::chrono::steady_clock::now();
+        ov::Tensor encoded_vision_start_token = m_tokenizer.encode(m_vlm_config.vision_start_token, ov::genai::add_special_tokens(false)).input_ids;
+        ov::Tensor encoded_image_pad_token = m_tokenizer.encode(m_vlm_config.image_pad_token, ov::genai::add_special_tokens(false)).input_ids;
+        auto end_tokenizer_time = std::chrono::steady_clock::now();
+        OPENVINO_ASSERT(metrics.raw_metrics.tokenization_durations.size() > 0);
+        metrics.raw_metrics.tokenization_durations[metrics.raw_metrics.tokenization_durations.size() - 1] += ov::genai::MicroSeconds(PerfMetrics::get_microsec(end_tokenizer_time - start_tokenizer_time));
+        int64_t vision_start_token_id = encoded_vision_start_token.data<int64_t>()[encoded_vision_start_token.get_size() - 1];
+        int64_t image_pad_token_id = encoded_image_pad_token.data<int64_t>()[encoded_image_pad_token.get_size() - 1];
+
+        m_position_ids = create_position_ids(input_ids, images_grid_thw, vision_start_token_id);
+
+        int64_t position_ids_max_element = *std::max_element(m_position_ids.data<int64_t>(), m_position_ids.data<int64_t>() + m_position_ids.get_size());
+        m_rope_delta = position_ids_max_element + 1 - static_cast<int64_t>(input_ids.get_shape().at(1));
+
+        return merge_text_and_image_embeddings_qwen2vl(input_ids, text_embeds, image_embeds, images_grid_thw, image_pad_token_id);
+    }
+
+    virtual std::pair<ov::Tensor, std::optional<int64_t>> get_position_ids(const size_t inputs_embeds_size, const size_t history_size) override {
+        if (history_size != 0) {
+            ov::Tensor position_ids{ov::element::i64, {3, 1, inputs_embeds_size}};
+            int64_t new_pos_id = static_cast<int64_t>(history_size + m_rope_delta);
+            for (size_t dim = 0; dim < 3; ++dim) {
+                int64_t* pos_data = position_ids.data<int64_t>() + dim * inputs_embeds_size;
+                std::iota(pos_data, pos_data + inputs_embeds_size, new_pos_id);
+            }
+            return {position_ids, m_rope_delta};
+        }
+        return {m_position_ids, m_rope_delta};
+    }
+
+    virtual void start_chat(const std::string& system_message) override {
+        IInputsEmbedder::start_chat(system_message);
+        m_position_ids = ov::Tensor();
+        m_rope_delta = 0;
+    }
+
+    virtual void finish_chat() override {
+        IInputsEmbedder::finish_chat();
+        m_position_ids = ov::Tensor();
+        m_rope_delta = 0;
+    }
+protected:
+    ov::Tensor merge_text_and_image_embeddings_qwen2vl(
+        const ov::Tensor& input_ids,
+        const ov::Tensor& text_embeds,
+        const std::vector<ov::Tensor>& image_embeds,
+        const std::vector<std::array<size_t, 3>> images_grid_thw,
+        const int64_t image_pad_token_id
+    ) {
+        // Calculate cumulative sequence lengths for attention mask
+        std::vector<int32_t> cu_seqlens;
+        cu_seqlens.push_back(0);
+        int32_t cumsum = 0;
+        for (const auto& grid_thw : images_grid_thw) {
+            size_t slice_len = grid_thw.at(1) * grid_thw.at(2);
+            for (size_t t = 0; t < grid_thw.at(0); ++t) {
+                cumsum += slice_len;
+                cu_seqlens.push_back(cumsum);
+            }
+        }
+
+        // Create attention mask for vision embeddings merger model
+        size_t hidden_states_size = cumsum;
+        ov::Tensor attention_mask{ov::element::f32, {1, hidden_states_size, hidden_states_size}};
+        float* attention_mask_data = attention_mask.data<float>();
+        std::fill_n(attention_mask_data, attention_mask.get_size(), -std::numeric_limits<float>::infinity());
+
+        for (size_t i = 1; i < cu_seqlens.size(); ++i) {
+            size_t start = cu_seqlens[i-1];
+            size_t end = cu_seqlens[i];
+            for (size_t row = start; row < end; ++row) {
+                for (size_t col = start; col < end; ++col) {
+                    attention_mask_data[row * hidden_states_size + col] = 0.0f;
+                }
+            }
+        }
+
+        // Concatenate image embeddings 
+        ov::Tensor concatenated_images;
+        if (image_embeds.size() == 1) {
+            concatenated_images = image_embeds.at(0);
+        } else {
+            size_t total_length = 0;
+            for (const auto& embed : image_embeds) {
+                total_length += embed.get_shape().at(0);
+            }
+            size_t hidden_dim = image_embeds.at(0).get_shape().at(1);
+            
+            concatenated_images = ov::Tensor(image_embeds.at(0).get_element_type(), {total_length, hidden_dim});
+            float* concat_data = concatenated_images.data<float>();
+            
+            size_t offset = 0;
+            for (const auto& embed : image_embeds) {
+                size_t embed_size = embed.get_shape().at(0) * embed.get_shape().at(1);
+                std::memcpy(concat_data + offset, embed.data(), embed.get_byte_size());
+                offset += embed_size;
+            }
+        }
+
+        ov::Tensor rotary_pos_emb = get_rotary_pos_emb(images_grid_thw);
+
+        m_vision_embeddings_merger.set_tensor("hidden_states", concatenated_images);
+        m_vision_embeddings_merger.set_tensor("attention_mask", attention_mask);
+        m_vision_embeddings_merger.set_tensor("rotary_pos_emb", rotary_pos_emb);
+        m_vision_embeddings_merger.infer();
+        ov::Tensor processed_vision_embeds = m_vision_embeddings_merger.get_output_tensor();
+
+        ov::Tensor merged_embeds(text_embeds.get_element_type(), text_embeds.get_shape());
+        std::memcpy(merged_embeds.data(), text_embeds.data(), text_embeds.get_byte_size());
+
+        auto text_embeds_shape = text_embeds.get_shape();
+        size_t batch_size = text_embeds_shape.at(0);
+        size_t seq_length = text_embeds_shape.at(1);
+        size_t hidden_size = text_embeds_shape.at(2);
+
+        const int64_t* input_ids_data = input_ids.data<const int64_t>();
+        float* merged_embeds_data = merged_embeds.data<float>();
+        const float* vision_embeds_data = processed_vision_embeds.data<const float>();
+        
+        size_t vision_embed_idx = 0;
+        for (size_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) {
+            for (size_t seq_idx = 0; seq_idx < seq_length; ++seq_idx) {
+                size_t flat_idx = batch_idx * seq_length + seq_idx;
+                if (input_ids_data[flat_idx] == image_pad_token_id) {
+                    std::copy_n(
+                        vision_embeds_data + vision_embed_idx * hidden_size,
+                        hidden_size,
+                        merged_embeds_data + flat_idx * hidden_size
+                    );
+                    ++vision_embed_idx;
+                }
+            }
+        }
+        return merged_embeds;
+    }
+
+    ov::Tensor get_rotary_pos_emb(const std::vector<std::array<size_t, 3>>& grids_thw) {  
+        const size_t spatial_merge_size = m_vision_encoder.m_processor_config.merge_size;
+
+        std::vector<std::vector<size_t>> all_pos_ids;
+        size_t total_positions = 0;
+        size_t max_grid_size = 0;
+
+        for (const auto& grid_thw : grids_thw) {
+            size_t t = grid_thw.at(0);
+            size_t h = grid_thw.at(1);
+            size_t w = grid_thw.at(2);
+
+            total_positions += t * h * w;
+            max_grid_size = std::max({max_grid_size, h, w});
+            
+            // Create height position IDs
+            std::vector<size_t> hpos_ids(h * w);
+            for (size_t hi = 0; hi < h; ++hi) {
+                for (size_t wi = 0; wi < w; ++wi) {
+                    size_t idx = hi * w + wi;
+                    hpos_ids[idx] = hi;
+                }
+            }
+
+            // Reshape hpos_ids according to spatial merge size
+            std::vector<size_t> reshaped_hpos;
+            size_t h_blocks = h / spatial_merge_size;
+            size_t w_blocks = w / spatial_merge_size;
+            reshaped_hpos.reserve(h * w);
+
+            for (size_t hb = 0; hb < h_blocks; ++hb) {
+                for (size_t wb = 0; wb < w_blocks; ++wb) {
+                    for (size_t hs = 0; hs < spatial_merge_size; ++hs) {
+                        for (size_t ws = 0; ws < spatial_merge_size; ++ws) {
+                            reshaped_hpos.push_back(hb * spatial_merge_size + hs);
+                        }
+                    }
+                }
+            }
+
+            // Create width position IDs
+            std::vector<size_t> wpos_ids(h * w);
+            for (size_t hi = 0; hi < h; ++hi) {
+                for (size_t wi = 0; wi < w; ++wi) {
+                    size_t idx = hi * w + wi;
+                    wpos_ids[idx] = wi;
+                }
+            }
+
+            // Reshape wpos_ids according to spatial merge size
+            std::vector<size_t> reshaped_wpos;
+            reshaped_wpos.reserve(h * w);
+
+            for (size_t hb = 0; hb < h_blocks; ++hb) {
+                for (size_t wb = 0; wb < w_blocks; ++wb) {
+                    for (size_t hs = 0; hs < spatial_merge_size; ++hs) {
+                        for (size_t ws = 0; ws < spatial_merge_size; ++ws) {
+                            reshaped_wpos.push_back(wb * spatial_merge_size + ws);
+                        }
+                    }
+                }
+            }
+
+            // Stack and repeat for each t
+            for (size_t i = 0; i < t; ++i) {
+                for (size_t j = 0; j < reshaped_hpos.size(); ++j) {
+                    all_pos_ids.push_back({reshaped_hpos[j], reshaped_wpos[j]});
+                }
+            }
+        }
+
+        // Calculate rotary embeddings for max_grid_size
+        const size_t dim = 1280 / 16 / 2; // config.vision_config.embed_dim / self.config.vision_config.num_heads / 2
+        const float theta = 10000.0f;
+        
+        std::vector<float> inv_freq(dim / 2);
+        for (size_t i = 0; i < dim / 2; ++i) {
+            inv_freq[i] = 1.0f / std::pow(theta, static_cast<float>(i) / static_cast<float>(dim / 2));
+        }
+
+        std::vector<std::vector<float>> freqs(max_grid_size);
+        for (size_t i = 0; i < max_grid_size; ++i) {
+            freqs[i].resize(dim / 2);
+            for (size_t j = 0; j < dim / 2; ++j) {
+                freqs[i][j] = static_cast<float>(i) * inv_freq[j];
+            }
+        }
+
+        ov::Tensor rotary_pos_emb(ov::element::f32, {all_pos_ids.size(), dim});
+        float* output_data = rotary_pos_emb.data<float>();
+
+        for (size_t i = 0; i < all_pos_ids.size(); ++i) {
+            const auto& pos = all_pos_ids.at(i);
+            size_t h_idx = pos.at(0);
+            size_t w_idx = pos.at(1);
+            std::copy_n(freqs[h_idx].begin(), dim / 2, output_data + i * dim);
+            std::copy_n(freqs[w_idx].begin(), dim / 2, output_data + i * dim + dim / 2);
+        }
+
+        return rotary_pos_emb;
+    }
+
+    ov::Tensor create_position_ids(
+        const ov::Tensor& input_ids_tensor,
+        const std::vector<std::array<size_t, 3>>& images_grid_thw,
+        const int64_t vision_start_token_id
+    ) {
+        const size_t spatial_merge_size = m_vision_encoder.m_processor_config.merge_size;
+        
+        const int64_t* input_ids = input_ids_tensor.data<int64_t>();
+        size_t batch_size = input_ids_tensor.get_shape().at(0);
+        size_t seq_len = input_ids_tensor.get_shape().at(1);
+
+        std::vector<size_t> vision_start_indices;
+        for (size_t i = 0; i < seq_len; ++i) {
+            if (input_ids[i] == vision_start_token_id) {
+                vision_start_indices.push_back(i);
+            }
+        }
+
+        ov::Tensor position_ids{ov::element::i64, {3, batch_size, seq_len}};
+        int64_t* pos_data = position_ids.data<int64_t>();
+        
+        size_t st = 0;
+        int64_t next_pos = 0;
+        size_t grid_idx = 0;
+
+        for (size_t i = 0; i < vision_start_indices.size(); ++i) {
+            size_t ed = vision_start_indices.at(i);
+
+            // Process text tokens before image
+            if (st < ed) {
+                for (size_t pos = st; pos < ed; ++pos) {
+                    pos_data[pos] = next_pos;               // temporal
+                    pos_data[seq_len + pos] = next_pos;     // height
+                    pos_data[2 * seq_len + pos] = next_pos; // width
+                    next_pos++;
+                }
+            }
+
+            // Process image start token
+            pos_data[ed] = next_pos;               // temporal
+            pos_data[seq_len + ed] = next_pos;     // height
+            pos_data[2 * seq_len + ed] = next_pos; // width
+            next_pos++;
+            ed++;
+
+            // Process image token with grid
+            if (grid_idx < images_grid_thw.size()) {
+                const auto& grid = images_grid_thw.at(grid_idx);
+                size_t llm_grid_h = grid.at(1) / spatial_merge_size;
+                size_t llm_grid_w = grid.at(2) / spatial_merge_size;
+                size_t ed_image = ed + llm_grid_h * llm_grid_w;
+
+                // Fill temporal dimension
+                std::fill_n(pos_data + ed, llm_grid_h * llm_grid_w, next_pos);
+
+                // Fill height and width dimensions
+                int64_t* height_data = pos_data + seq_len + ed;
+                int64_t* width_data = pos_data + 2 * seq_len + ed;
+                for (size_t h = 0; h < llm_grid_h; ++h) {
+                    std::fill_n(height_data + h * llm_grid_w, llm_grid_w, next_pos + h);
+                    for (size_t w = 0; w < llm_grid_w; ++w) {
+                        width_data[h * llm_grid_w + w] = next_pos + w;
+                    }
+                }
+
+                next_pos += std::max(llm_grid_h, llm_grid_w);
+                st = ed_image;
+                grid_idx++;
+            }
+        }
+
+        // Process remaining text tokens
+        if (st < seq_len) {
+            for (size_t pos = st; pos < seq_len; ++pos) {
+                pos_data[pos] = next_pos;               // temporal
+                pos_data[seq_len + pos] = next_pos;     // height
+                pos_data[2 * seq_len + pos] = next_pos; // width
+                next_pos++;
+            }
+        }
+
+        return position_ids;
+    }
+};
+
 InputsEmbedder::InputsEmbedder(const VLMConfig& vlm_config,
                                const std::filesystem::path& model_dir,
                                const std::string& device,
@@ -1169,6 +1577,8 @@ InputsEmbedder::InputsEmbedder(const VLMConfig& vlm_config,
         m_impl = std::make_shared<InputsEmbedderLLaVANext>(vlm_config, model_dir, device, device_config);
     } else if (vlm_config.model_type == VLMModelType::INTERNVL_CHAT) {
         m_impl = std::make_shared<InputsEmbedderInternVLChat>(vlm_config, model_dir, device, device_config);
+    } else if (vlm_config.model_type == VLMModelType::QWEN2_VL) {
+        m_impl = std::make_shared<InputsEmbedderQwen2VL>(vlm_config, model_dir, device, device_config);
     } else {
         OPENVINO_THROW("Unsupported model type in VLM InputsEmbedder class. Please, create feature request on new model support");
     }
@@ -1188,6 +1598,8 @@ InputsEmbedder::InputsEmbedder(const VLMConfig& vlm_config,
         m_impl = std::make_shared<InputsEmbedderLLaVANext>(vlm_config, models_map, tokenizer, config_dir_path, device, device_config);
     } else if (vlm_config.model_type == VLMModelType::INTERNVL_CHAT) {
         m_impl = std::make_shared<InputsEmbedderInternVLChat>(vlm_config, models_map, tokenizer, config_dir_path, device, device_config);
+    } else if (vlm_config.model_type == VLMModelType::QWEN2_VL) {
+        m_impl = std::make_shared<InputsEmbedderQwen2VL>(vlm_config, models_map, tokenizer, config_dir_path, device, device_config);
     } else {
         OPENVINO_THROW("Unsupported model type in VLM InputsEmbedder class. Please, create feature request on new model support");
     }
@@ -1197,6 +1609,10 @@ ov::Tensor InputsEmbedder::get_inputs_embeds(const std::string& prompt, const st
     return m_impl->get_inputs_embeds(prompt, images, metrics);
 }
 
+std::pair<ov::Tensor, std::optional<int64_t>> InputsEmbedder::get_position_ids(const size_t inputs_embeds_size, const size_t history_size) {
+    return m_impl->get_position_ids(inputs_embeds_size, history_size);
+}
+
 EmbeddingsModel InputsEmbedder::get_embedding_model() const {
     return m_impl->get_embedding_model();
 }
diff --git a/src/cpp/src/visual_language/inputs_embedder.hpp b/src/cpp/src/visual_language/inputs_embedder.hpp
index 1d72b742ab..223d090b22 100644
--- a/src/cpp/src/visual_language/inputs_embedder.hpp
+++ b/src/cpp/src/visual_language/inputs_embedder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -34,6 +34,9 @@ class InputsEmbedder {
     // compute input embedding for prompt and multiple images
     ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images, ov::genai::VLMPerfMetrics& metrics);
 
+    // compute position ids for language model input
+    std::pair<ov::Tensor, std::optional<int64_t>> get_position_ids(const size_t inputs_embeds_size, const size_t history_size);
+
     // returns embedding model which converts token_id(s) to embedding vectors
     EmbeddingsModel get_embedding_model() const;
 
@@ -65,6 +68,7 @@ class InputsEmbedder {
     friend class InputsEmbedderLLaVA;
     friend class InputsEmbedderLLaVANext;
     friend class InputsEmbedderInternVLChat;
+    friend class InputsEmbedderQwen2VL;
 };
 
 } // namespace ov::genai
diff --git a/src/cpp/src/visual_language/perf_metrics.cpp b/src/cpp/src/visual_language/perf_metrics.cpp
index a3afb83641..a5894078bf 100644
--- a/src/cpp/src/visual_language/perf_metrics.cpp
+++ b/src/cpp/src/visual_language/perf_metrics.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/visual_language/perf_metrics.hpp"
diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
index ebc5c3b5dd..95e3064548 100644
--- a/src/cpp/src/visual_language/pipeline.cpp
+++ b/src/cpp/src/visual_language/pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <optional>
@@ -208,8 +208,9 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
         ov::Tensor new_atten_mask = ov::Tensor{ov::element::i64, { 1, history_size + inputs_embeds_size }};
         std::fill_n(new_atten_mask.data<int64_t>(), new_atten_mask.get_size(), 1);
 
-        ov::Tensor position_ids = ov::Tensor{ov::element::i64, { 1, inputs_embeds_size }};
-        std::iota(position_ids.data<int64_t>(), position_ids.data<int64_t>() + position_ids.get_size(), history_size);
+        ov::Tensor position_ids;
+        std::optional<int64_t> rope_delta;
+        std::tie(position_ids, rope_delta) = m_inputs_embedder->get_position_ids(inputs_embeds_size, history_size);
 
         if (m_sampler.get_seed() != generation_config.rng_seed) {
             m_sampler.set_seed(generation_config.rng_seed);
@@ -218,7 +219,7 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
         ov::genai::EncodedResults encoded_result;
         std::optional<int64_t> last_disappeared_token;
         std::tie(encoded_result, last_disappeared_token) = ov::genai::get_lm_encoded_results(m_language, inputs_embeds, new_atten_mask, streamer_ptr, m_sampler, requests,
-                                                                                             position_ids, m_embedding);
+                                                                                             position_ids, m_embedding, rope_delta);
 
         auto decode_start_time = std::chrono::steady_clock::now();
         VLMDecodedResults decoded;
diff --git a/src/cpp/src/visual_language/processor_config.cpp b/src/cpp/src/visual_language/processor_config.cpp
index fc524fce9c..f790c58912 100644
--- a/src/cpp/src/visual_language/processor_config.cpp
+++ b/src/cpp/src/visual_language/processor_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "processor_config.hpp"
@@ -33,7 +33,7 @@ ov::genai::ProcessorConfig::ProcessorConfig(const std::filesystem::path& json_pa
         crop_size_height = parsed.at("crop_size").at("height");
         crop_size_width = parsed.at("crop_size").at("width");
     }
-    if (parsed.contains("size")) {
+    if (parsed.contains("size") && parsed.at("size").contains("shortest_edge")) {
         size_shortest_edge = parsed.at("size").at("shortest_edge");
     }
 
@@ -41,4 +41,10 @@ ov::genai::ProcessorConfig::ProcessorConfig(const std::filesystem::path& json_pa
     if (parsed.contains("image_grid_pinpoints")) {
         image_grid_pinpoints = parsed.at("image_grid_pinpoints").get<std::vector<std::pair<int, int>>>();
     }
+
+    // Setting qwen2vl config params
+    read_json_param(parsed, "min_pixels", min_pixels);
+    read_json_param(parsed, "max_pixels", max_pixels);
+    read_json_param(parsed, "temporal_patch_size", temporal_patch_size);
+    read_json_param(parsed, "merge_size", merge_size);
 }
diff --git a/src/cpp/src/visual_language/processor_config.hpp b/src/cpp/src/visual_language/processor_config.hpp
index 83cf9870a3..1d40e091a9 100644
--- a/src/cpp/src/visual_language/processor_config.hpp
+++ b/src/cpp/src/visual_language/processor_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -45,6 +45,12 @@ class ProcessorConfig {
     // llava-next specific config params
     std::vector<std::pair<int, int>> image_grid_pinpoints{{336, 672}, {672, 336}, {672, 672}, {1008, 336}, {336, 1008}};
 
+    // qwen2vl specific params
+    size_t min_pixels = 3136;
+    size_t max_pixels = 12845056;
+    size_t temporal_patch_size = 2;
+    size_t merge_size = 2;
+
     /// @brief Default constructor
     ProcessorConfig() = default;
     /// @brief Construct ProcessorConfig from values in json_path.
diff --git a/src/cpp/src/visual_language/vision_encoder.cpp b/src/cpp/src/visual_language/vision_encoder.cpp
index 9f8f9b0498..4a5179fdd0 100644
--- a/src/cpp/src/visual_language/vision_encoder.cpp
+++ b/src/cpp/src/visual_language/vision_encoder.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "vision_encoder.hpp"
@@ -644,6 +644,158 @@ ov::Tensor get_pixel_values_internvl(const ov::Tensor& image, const ProcessorCon
     }
     return output_tensor;
 }
+
+ImageSize smart_resize_qwen2vl(size_t height, size_t width, size_t factor, size_t min_pixels, size_t max_pixels) {
+    if (height < factor || width < factor) {
+        OPENVINO_THROW("Height or width must be larger than factor");
+    }
+    if (std::max(height, width) / std::min(height, width) > 200) {
+        OPENVINO_THROW("Absolute aspect ratio must be smaller than 200");
+    }
+
+    size_t h_bar = std::round(static_cast<float>(height) / factor) * factor;
+    size_t w_bar = std::round(static_cast<float>(width) / factor) * factor; 
+
+    if (h_bar * w_bar > max_pixels) {
+        double beta = std::sqrt((height * width) / static_cast<double>(max_pixels));
+        h_bar = std::floor(height / beta / factor) * factor;
+        w_bar = std::floor(width / beta / factor) * factor;
+    } else if (h_bar * w_bar < min_pixels) {
+        double beta = std::sqrt(min_pixels / static_cast<double>(height * width));
+        h_bar = std::ceil(height * beta / factor) * factor;
+        w_bar = std::ceil(width * beta / factor) * factor;
+    }
+    
+    return ImageSize{h_bar, w_bar};
+}
+
+ov::Tensor reshape_image_patches_qwen2vl(
+    const ov::Tensor& patches,
+    const size_t grid_t,
+    const size_t grid_h,
+    const size_t grid_w,
+    const size_t channel,
+    const size_t temporal_patch_size,
+    const size_t patch_size,
+    const size_t spatial_merge_size
+) {
+    ov::Shape output_shape{
+        grid_t,                      
+        temporal_patch_size,         
+        channel,                     
+        grid_h / spatial_merge_size, 
+        spatial_merge_size,          
+        patch_size,                  
+        grid_w / spatial_merge_size, 
+        spatial_merge_size,          
+        patch_size                   
+    };
+    
+    ov::Tensor reshaped_patches(patches.get_element_type(), output_shape);
+
+    const float* input_data = patches.data<float>();
+    float* output_data = reshaped_patches.data<float>();
+
+    size_t input_idx = 0;
+    
+    for (size_t gt = 0; gt < output_shape.at(0); ++gt) {
+        for (size_t tp = 0; tp < output_shape.at(1); ++tp) {
+            for (size_t c = 0; c < output_shape.at(2); ++c) {
+                for (size_t gh = 0; gh < output_shape.at(3); ++gh) {
+                    for (size_t ms1 = 0; ms1 < output_shape.at(4); ++ms1) {
+                        for (size_t p1 = 0; p1 < output_shape.at(5); ++p1) {
+                            for (size_t gw = 0; gw < output_shape.at(6); ++gw) {
+                                for (size_t ms2 = 0; ms2 < output_shape.at(7); ++ms2) {
+                                    for (size_t p2 = 0; p2 < output_shape.at(8); ++p2) {
+                                        size_t output_idx = gt;
+                                        output_idx = output_idx * output_shape.at(1) + tp;
+                                        output_idx = output_idx * output_shape.at(2) + c;
+                                        output_idx = output_idx * output_shape.at(3) + gh;
+                                        output_idx = output_idx * output_shape.at(4) + ms1;
+                                        output_idx = output_idx * output_shape.at(5) + p1;
+                                        output_idx = output_idx * output_shape.at(6) + gw;
+                                        output_idx = output_idx * output_shape.at(7) + ms2;
+                                        output_idx = output_idx * output_shape.at(8) + p2;
+
+                                        output_data[output_idx] = input_data[input_idx];
+                                        input_idx++;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return reshaped_patches;
+}
+
+ov::Tensor transpose_image_patches_qwen2vl(const ov::Tensor& reshaped_patches) {
+    // Input dimensions order:  [0,1,2,3,4,5,6,7,8]
+    // Output dimensions order: [0,3,6,4,7,2,1,5,8]
+    auto input_shape = reshaped_patches.get_shape();
+    
+    ov::Shape output_shape = {
+        input_shape.at(0), // grid_t
+        input_shape.at(3), // grid_h / spatial_merge_size
+        input_shape.at(6), // grid_w / spatial_merge_size
+        input_shape.at(4), // spatial_merge_size
+        input_shape.at(7), // spatial_merge_size
+        input_shape.at(2), // channel
+        input_shape.at(1), // temporal_patch_size
+        input_shape.at(5), // patch_size
+        input_shape.at(8)  // patch_size
+    };
+
+    ov::Tensor transposed_patches(reshaped_patches.get_element_type(), output_shape);
+    
+    const float* src = reshaped_patches.data<float>();
+    float* dst = transposed_patches.data<float>();
+    
+    size_t shape_size = input_shape.size();
+    std::vector<size_t> input_strides(shape_size);
+    std::vector<size_t> output_strides(shape_size);
+    
+    input_strides[shape_size - 1] = 1;
+    output_strides[shape_size - 1] = 1;
+    for(int i = 7; i >= 0; i--) {
+        input_strides[i] = input_strides[i+1] * input_shape[i+1];
+        output_strides[i] = output_strides[i+1] * output_shape[i+1];
+    }
+
+    size_t total_elements = reshaped_patches.get_size();
+    for(size_t idx = 0; idx < total_elements; idx++) {
+        size_t remaining = idx;
+        std::vector<size_t> input_indices(shape_size);
+        for(int i = 0; i < shape_size; i++) {
+            input_indices[i] = remaining / input_strides[i];
+            remaining %= input_strides[i];
+        }
+        
+        std::vector<size_t> output_indices = {
+            input_indices.at(0),
+            input_indices.at(3),
+            input_indices.at(6),
+            input_indices.at(4),
+            input_indices.at(7),
+            input_indices.at(2),
+            input_indices.at(1),
+            input_indices.at(5),
+            input_indices.at(8)
+        };
+        
+        size_t dst_idx = 0;
+        for(int i = 0; i < shape_size; i++) {
+            dst_idx += output_indices[i] * output_strides[i];
+        }
+        
+        dst[dst_idx] = src[idx];
+    }
+    
+    return transposed_patches;
+}
 }
 
 VisionEncoder::VisionEncoder(const std::filesystem::path& model_dir, const VLMModelType model_type, const std::string& device, const ov::AnyMap device_config) :
@@ -678,8 +830,10 @@ EncodedImage VisionEncoder::encode(const ov::Tensor& image, const ProcessorConfi
         return encode_llava(image, config);
     } else if (model_type == VLMModelType::LLAVA_NEXT) {
         return encode_llava_next(image, config);
-    }  else if (model_type == VLMModelType::INTERNVL_CHAT) {
+    } else if (model_type == VLMModelType::INTERNVL_CHAT) {
         return encode_internvl(image, config);
+    } else if (model_type == VLMModelType::QWEN2_VL) {
+        return encode_qwen2vl(image, config);
     } else {
         OPENVINO_THROW("Unsupported type of VisionEncoder");
     }
@@ -753,3 +907,74 @@ EncodedImage VisionEncoder::encode_internvl(const ov::Tensor& image, const Proce
 
     return {std::move(image_features), resized_source_size};
 }
+
+EncodedImage VisionEncoder::encode_qwen2vl(const ov::Tensor& image, const ProcessorConfig& config) {
+    ov::Shape image_shape = image.get_shape();
+    auto original_height = image_shape.at(1);
+    auto original_width = image_shape.at(2);
+
+    ImageSize target_image_size = smart_resize_qwen2vl(
+        original_height, 
+        original_width, 
+        config.patch_size * config.merge_size,
+        config.min_pixels,
+        config.max_pixels
+    );
+
+    clip_image_u8 input_image = tensor_to_clip_image_u8(image);
+    clip_image_u8 resized_image;
+    bicubic_resize(input_image, resized_image, target_image_size.width, target_image_size.height);
+
+    clip_ctx ctx;
+    std::copy(config.image_mean.begin(), config.image_mean.end(), ctx.image_mean);
+    std::copy(config.image_std.begin(), config.image_std.end(), ctx.image_std);
+    clip_image_f32 normalized_image = clip_image_preprocess(ctx, resized_image);
+
+    ov::Tensor patches = clip_image_f32_to_tensor(normalized_image);
+
+    // For single patch tile it to match temporal_patch_size
+    if (patches.get_shape().at(0) == 1) {
+        auto orig_shape = patches.get_shape();
+        ov::Tensor tiled_patches(patches.get_element_type(),
+                                 {config.temporal_patch_size, orig_shape.at(1), orig_shape.at(2), orig_shape.at(3)});
+        
+        for (size_t i = 0; i < config.temporal_patch_size; i++) {
+            std::memcpy(
+                tiled_patches.data<float>() + i * patches.get_byte_size() / sizeof(float),
+                patches.data<float>(),
+                patches.get_byte_size()
+            );
+        }
+        patches = std::move(tiled_patches);
+    }
+
+    auto patches_shape = patches.get_shape();
+    size_t channel = patches_shape.at(1);
+    
+    size_t grid_t = patches_shape.at(0) / config.temporal_patch_size;
+    size_t grid_h = target_image_size.height / config.patch_size;
+    size_t grid_w = target_image_size.width / config.patch_size;
+
+    ov::Tensor reshaped_patches = reshape_image_patches_qwen2vl(
+        patches, grid_t, grid_h, grid_w, channel, config.temporal_patch_size, config.patch_size, config.merge_size
+    );
+    ov::Tensor transposed_patches = transpose_image_patches_qwen2vl(reshaped_patches);
+
+    ov::Shape flattened_patches_shape{
+        grid_t * grid_h * grid_w,
+        channel * config.temporal_patch_size * config.patch_size * config.patch_size
+    };
+    ov::Tensor flattened_patches(transposed_patches.get_element_type(), flattened_patches_shape);
+    std::memcpy(flattened_patches.data(), transposed_patches.data(), transposed_patches.get_byte_size());
+
+    m_vision_encoder.set_tensor("hidden_states", flattened_patches);
+    m_vision_encoder.infer();
+
+    const ov::Tensor& infer_output = m_vision_encoder.get_output_tensor();
+    ov::Tensor image_features(infer_output.get_element_type(), infer_output.get_shape());
+    std::memcpy(image_features.data(), infer_output.data(), infer_output.get_byte_size());
+
+    ImageSize resized_source_size{grid_h, grid_w};
+
+    return {std::move(image_features), resized_source_size};
+}
diff --git a/src/cpp/src/visual_language/vision_encoder.hpp b/src/cpp/src/visual_language/vision_encoder.hpp
index d94a898353..e725c06bf4 100644
--- a/src/cpp/src/visual_language/vision_encoder.hpp
+++ b/src/cpp/src/visual_language/vision_encoder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -158,5 +158,9 @@ class VisionEncoder {
     EncodedImage encode_internvl(
         const ov::Tensor& image, const ProcessorConfig& config
     );
+
+    EncodedImage encode_qwen2vl(
+        const ov::Tensor& image, const ProcessorConfig& config
+    );
 };
 }
diff --git a/src/cpp/src/visual_language/vlm_config.cpp b/src/cpp/src/visual_language/vlm_config.cpp
index c711998128..6eab781fc0 100644
--- a/src/cpp/src/visual_language/vlm_config.cpp
+++ b/src/cpp/src/visual_language/vlm_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "vlm_config.hpp"
diff --git a/src/cpp/src/visual_language/vlm_config.hpp b/src/cpp/src/visual_language/vlm_config.hpp
index c126d5495e..c70c757707 100644
--- a/src/cpp/src/visual_language/vlm_config.hpp
+++ b/src/cpp/src/visual_language/vlm_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -55,6 +55,13 @@ class VLMConfig {
     /// @brief A string token denoting end of image embeddings for InternVL2 model.
     std::string image_end_token = "</img>";
     
+    /// @brief A string token denoting start of vision embeddings for Qwen2VL model.
+    std::string vision_start_token = "<|vision_start|>";
+    /// @brief A placeholder for image embeddings in text for Qwen2VL model.
+    std::string image_pad_token = "<|image_pad|>";
+    /// @brief A string token denoting end of vision embeddings for Qwen2VL model.
+    std::string vision_end_token = "<|vision_end|>";
+    
     /// @brief Default constructor.
     VLMConfig() = default;
     /// @brief Construct VLMConfig from values in json_path.
diff --git a/src/cpp/src/visual_language/vlm_model_type.hpp b/src/cpp/src/visual_language/vlm_model_type.hpp
index e4b5e823b6..6f554fbf98 100644
--- a/src/cpp/src/visual_language/vlm_model_type.hpp
+++ b/src/cpp/src/visual_language/vlm_model_type.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -16,6 +16,7 @@ enum class VLMModelType {
     LLAVA,
     LLAVA_NEXT,
     INTERNVL_CHAT,
+    QWEN2_VL,
 };
 
 inline VLMModelType to_vlm_model_type(const std::string& value) {
@@ -23,7 +24,8 @@ inline VLMModelType to_vlm_model_type(const std::string& value) {
         {"minicpmv", VLMModelType::MINICPM},
         {"llava", VLMModelType::LLAVA},
         {"llava_next", VLMModelType::LLAVA_NEXT},
-        {"internvl_chat", VLMModelType::INTERNVL_CHAT}
+        {"internvl_chat", VLMModelType::INTERNVL_CHAT},
+        {"qwen2_vl", VLMModelType::QWEN2_VL}
     };
 
     auto it = model_types_map.find(value);
diff --git a/src/cpp/src/whisper/context_tokens.cpp b/src/cpp/src/whisper/context_tokens.cpp
index 75ee442551..d39ba7cc91 100644
--- a/src/cpp/src/whisper/context_tokens.cpp
+++ b/src/cpp/src/whisper/context_tokens.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "context_tokens.hpp"
diff --git a/src/cpp/src/whisper/context_tokens.hpp b/src/cpp/src/whisper/context_tokens.hpp
index 0042ba8136..422e7124e4 100644
--- a/src/cpp/src/whisper/context_tokens.hpp
+++ b/src/cpp/src/whisper/context_tokens.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/logit_processor.cpp b/src/cpp/src/whisper/logit_processor.cpp
index d3d9552f57..92ae39bd4c 100644
--- a/src/cpp/src/whisper/logit_processor.cpp
+++ b/src/cpp/src/whisper/logit_processor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/src/cpp/src/whisper/logit_processor.hpp b/src/cpp/src/whisper/logit_processor.hpp
index ee51f905b5..2dd52a7905 100644
--- a/src/cpp/src/whisper/logit_processor.hpp
+++ b/src/cpp/src/whisper/logit_processor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/perf_metrics.cpp b/src/cpp/src/whisper/perf_metrics.cpp
index 3947ef616e..4d11acf266 100644
--- a/src/cpp/src/whisper/perf_metrics.cpp
+++ b/src/cpp/src/whisper/perf_metrics.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/whisper_pipeline.hpp"
diff --git a/src/cpp/src/whisper/streamer.cpp b/src/cpp/src/whisper/streamer.cpp
index 4cfce54442..cf84a0b9b2 100644
--- a/src/cpp/src/whisper/streamer.cpp
+++ b/src/cpp/src/whisper/streamer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "streamer.hpp"
diff --git a/src/cpp/src/whisper/streamer.hpp b/src/cpp/src/whisper/streamer.hpp
index 26c948e16d..df81b03f20 100644
--- a/src/cpp/src/whisper/streamer.hpp
+++ b/src/cpp/src/whisper/streamer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/timestamps.cpp b/src/cpp/src/whisper/timestamps.cpp
index 41b9fce59f..f2fadafb3b 100644
--- a/src/cpp/src/whisper/timestamps.cpp
+++ b/src/cpp/src/whisper/timestamps.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "timestamps.hpp"
diff --git a/src/cpp/src/whisper/timestamps.hpp b/src/cpp/src/whisper/timestamps.hpp
index ac43f6109a..0dcb6e6ef5 100644
--- a/src/cpp/src/whisper/timestamps.hpp
+++ b/src/cpp/src/whisper/timestamps.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/whisper.cpp b/src/cpp/src/whisper/whisper.cpp
index 3ab873609d..0f523e3bee 100644
--- a/src/cpp/src/whisper/whisper.cpp
+++ b/src/cpp/src/whisper/whisper.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "whisper.hpp"
diff --git a/src/cpp/src/whisper/whisper.hpp b/src/cpp/src/whisper/whisper.hpp
index fbdf56d171..aed6487160 100644
--- a/src/cpp/src/whisper/whisper.hpp
+++ b/src/cpp/src/whisper/whisper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/whisper_config.cpp b/src/cpp/src/whisper/whisper_config.cpp
index ab34063184..2818430aa6 100644
--- a/src/cpp/src/whisper/whisper_config.cpp
+++ b/src/cpp/src/whisper/whisper_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "whisper_config.hpp"
diff --git a/src/cpp/src/whisper/whisper_config.hpp b/src/cpp/src/whisper/whisper_config.hpp
index 8e67c3b5ab..e0f23aee04 100644
--- a/src/cpp/src/whisper/whisper_config.hpp
+++ b/src/cpp/src/whisper/whisper_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/whisper_feature_extractor.cpp b/src/cpp/src/whisper/whisper_feature_extractor.cpp
index 04070404c0..4a2fbe5a9b 100644
--- a/src/cpp/src/whisper/whisper_feature_extractor.cpp
+++ b/src/cpp/src/whisper/whisper_feature_extractor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #ifdef _WIN32
diff --git a/src/cpp/src/whisper/whisper_feature_extractor.hpp b/src/cpp/src/whisper/whisper_feature_extractor.hpp
index 9197cc32ea..9f01aa0320 100644
--- a/src/cpp/src/whisper/whisper_feature_extractor.hpp
+++ b/src/cpp/src/whisper/whisper_feature_extractor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper/whisper_models.hpp b/src/cpp/src/whisper/whisper_models.hpp
index 9a915e92f4..b2edfcccfe 100644
--- a/src/cpp/src/whisper/whisper_models.hpp
+++ b/src/cpp/src/whisper/whisper_models.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
diff --git a/src/cpp/src/whisper_generation_config.cpp b/src/cpp/src/whisper_generation_config.cpp
index beb663caaf..1cc79ab0e6 100644
--- a/src/cpp/src/whisper_generation_config.cpp
+++ b/src/cpp/src/whisper_generation_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/whisper_generation_config.hpp"
diff --git a/src/cpp/src/whisper_pipeline.cpp b/src/cpp/src/whisper_pipeline.cpp
index ffd792c889..70e3950536 100644
--- a/src/cpp/src/whisper_pipeline.cpp
+++ b/src/cpp/src/whisper_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "openvino/genai/whisper_pipeline.hpp"
diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp
index 91de478b1c..551774ec59 100644
--- a/src/cpp/src/whisper_pipeline_static.cpp
+++ b/src/cpp/src/whisper_pipeline_static.cpp
@@ -347,7 +347,7 @@ void add_attention_mask_input(std::shared_ptr<ov::Model> model) {
     using namespace ov::op;
     class AttentionMaskInput : public ov::pass::MatcherPass {
     public:
-        OPENVINO_RTTI("AttentionMaskInput");
+        OPENVINO_MATCHER_PASS_RTTI("AttentionMaskInput");
 
         AttentionMaskInput(std::shared_ptr<ov::Model> model) {
             auto range = wrap_type<v4::Range>();
diff --git a/src/docs/BUILD.md b/src/docs/BUILD.md
index 77657620a0..4cda924fd2 100644
--- a/src/docs/BUILD.md
+++ b/src/docs/BUILD.md
@@ -191,9 +191,11 @@ The path to the openvino install directory is referred as <INSTALL_DIR> througho
     ```
 4. Build the wheel in the `dist` directory:
     ```sh
-    python -m pip wheel . -w dist/ --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
+    python -m pip wheel . -w dist/ --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
     ```
 
+> **NOTE**: You'd need to build ABI compatible OpenVINO and OpenVINO Tokenizers for Ubuntu instead of downloading them from PyPI. See [OpenVINO™ GenAI Dependencies](../README.md#openvino-genai-dependencies) for the explanation.
+
 ### Install OpenVINO GenAI From Source
 
 1. Clone OpenVINO GenAI repository and init submodules:
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 1293246260..bb832d5acd 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/src/python/py_continuous_batching_pipeline.cpp b/src/python/py_continuous_batching_pipeline.cpp
index d6888c4547..6df67b3a39 100644
--- a/src/python/py_continuous_batching_pipeline.cpp
+++ b/src/python/py_continuous_batching_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index a97a43fc5c..e2a6d7062c 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_image_generation_models.cpp b/src/python/py_image_generation_models.cpp
index 75be28233f..877977926e 100644
--- a/src/python/py_image_generation_models.cpp
+++ b/src/python/py_image_generation_models.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
index c246557a97..b011aee878 100644
--- a/src/python/py_image_generation_pipelines.cpp
+++ b/src/python/py_image_generation_pipelines.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
index 2d5e5e6abc..969059ee16 100644
--- a/src/python/py_llm_pipeline.cpp
+++ b/src/python/py_llm_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_lora_adapter.cpp b/src/python/py_lora_adapter.cpp
index 7f98b67064..ddaf6e2081 100644
--- a/src/python/py_lora_adapter.cpp
+++ b/src/python/py_lora_adapter.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index f8e577d5c8..8b8bd831b0 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_perf_metrics.cpp b/src/python/py_perf_metrics.cpp
index 1d17e34905..17e71150ac 100644
--- a/src/python/py_perf_metrics.cpp
+++ b/src/python/py_perf_metrics.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index db4643a65c..0dd9f3d715 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <filesystem>
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
index bad702d54d..90cce498cd 100644
--- a/src/python/py_utils.cpp
+++ b/src/python/py_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include "py_utils.hpp"
@@ -72,6 +72,8 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
     std::set<std::string> any_map_properties = {
         "GENERATE_CONFIG",
         "PREFILL_CONFIG",
+        "++GENERATE_CONFIG",
+        "++PREFILL_CONFIG"
     };
 
     py::object float_32_type = py::module_::import("numpy").attr("float32");
diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp
index 9d78ab0930..c3dbdf6aee 100644
--- a/src/python/py_utils.hpp
+++ b/src/python/py_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #define PYBIND11_DETAILED_ERROR_MESSAGES
diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
index e076d87f11..f070c32629 100644
--- a/src/python/py_vlm_pipeline.cpp
+++ b/src/python/py_vlm_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
index d290612ed6..55728409e8 100644
--- a/src/python/py_whisper_pipeline.cpp
+++ b/src/python/py_whisper_pipeline.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <pybind11/functional.h>
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index 5880010841..9e557aeabc 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/tests/cpp/block_allocator.cpp b/tests/cpp/block_allocator.cpp
index a89233b2a9..982d38a0eb 100644
--- a/tests/cpp/block_allocator.cpp
+++ b/tests/cpp/block_allocator.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <gtest/gtest.h>
diff --git a/tests/cpp/block_hash_store.cpp b/tests/cpp/block_hash_store.cpp
index 6827a332b8..61c706cfb6 100644
--- a/tests/cpp/block_hash_store.cpp
+++ b/tests/cpp/block_hash_store.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/cpp/block_manager.cpp b/tests/cpp/block_manager.cpp
index 670a0dffe7..9501eaefb5 100644
--- a/tests/cpp/block_manager.cpp
+++ b/tests/cpp/block_manager.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/cpp/cache_manager.cpp b/tests/cpp/cache_manager.cpp
index 095cc39f09..7d855ded12 100644
--- a/tests/cpp/cache_manager.cpp
+++ b/tests/cpp/cache_manager.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/cpp/device_config.cpp b/tests/cpp/device_config.cpp
index 973648f637..93e06f02e7 100644
--- a/tests/cpp/device_config.cpp
+++ b/tests/cpp/device_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/cpp/scheduler.cpp b/tests/cpp/scheduler.cpp
index 23594adf50..ecd53fa665 100644
--- a/tests/cpp/scheduler.cpp
+++ b/tests/cpp/scheduler.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/cpp/utils.cpp b/tests/cpp/utils.cpp
index d00edae6fb..14b110b993 100644
--- a/tests/cpp/utils.cpp
+++ b/tests/cpp/utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2024 Intel Corporation
+// Copyright (C) 2018-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 2fca58a959..cbed01e20f 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import os
@@ -7,10 +7,10 @@
 
 from optimum.intel import OVModelForCausalLM
 from pathlib import Path
-from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, SchedulerConfig, GenerationResult, GenerationConfig, DecodedResults, StopCriteria
+from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, SchedulerConfig, GenerationResult, GenerationConfig, DecodedResults, StopCriteria, StreamerBase
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from transformers import GenerationConfig as HFGenerationConfig
-from typing import List, Tuple
+from typing import List, Tuple, Callable
 
 TESTS_ROOT = Path(__file__).parent
 
@@ -325,19 +325,50 @@ def get_default_properties():
     }
 
 
+class StreamerWithResults:
+    # Return a streamer which accumulates results in order to compare with results returned from generate.
+    results: List[str] = []
+    def __init__(self):
+        self.results = []
+
+    def accumulate(self, subword) -> bool:
+        self.results.append(subword)
+        return False
+    
+    def get_results(self) -> List[GenerationResult]:
+        streaming_result = GenerationResult()
+        streaming_result.m_generation_ids = [''.join(self.results)]
+        return [streaming_result]
+    
+    def reset(self):
+        self.results = []
+
+
+
 def run_llm_pipeline(
     models_path : Path,
     prompts: List[str],
     generation_config : GenerationConfig,
-    use_cb : bool = False
+    use_cb : bool = False,
+    streamer: StreamerWithResults | Callable | StreamerBase = None
 ) -> List[GenerationResult]:
     properties = get_default_properties()
     if use_cb:
         properties['scheduler_config'] = SchedulerConfig()
-
     ov_pipe = LLMPipeline(models_path, device='CPU', **properties)
-
-    generate_outputs : DecodedResults = ov_pipe.generate(inputs=prompts, generation_config=generation_config)
+    
+    if streamer is None and not (generation_config.is_beam_search() or generation_config.num_return_sequences > 1) and len(prompts) == 1:
+        # We can use streamer only if we have a single prompt and not beam search.
+        streamer = StreamerWithResults()
+    if isinstance(streamer, StreamerWithResults):
+        # Clear the accumulated strings to avoid side effects
+        streamer.reset()
+
+    generate_outputs : DecodedResults = ov_pipe.generate(
+        inputs=prompts, 
+        generation_config=generation_config, 
+        streamer=streamer.accumulate if isinstance(streamer, StreamerWithResults) else streamer
+    )
 
     index = 0
     generation_results = []
@@ -355,6 +386,9 @@ def run_llm_pipeline(
 
     del ov_pipe
     shutil.rmtree(models_path)
+    
+    if isinstance(streamer, StreamerWithResults):
+        compare_generation_results(prompts, generation_results, streamer.get_results(), generation_config)
 
     return generation_results
 
@@ -410,9 +444,14 @@ def convert_models(opt_model : OVModelForCausalLM, hf_tokenizer : AutoTokenizer,
     tokenizer, detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
     serialize(tokenizer, models_path / "openvino_tokenizer.xml")
     serialize(detokenizer, models_path / "openvino_detokenizer.xml")
+ 
 
-
-def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_config: GenerationConfig | dict, tmp_path: Path, use_cb : bool = False):
+def run_llm_pipeline_with_ref(model_id: str, 
+                              prompts: List[str], 
+                              generation_config: GenerationConfig | dict, 
+                              tmp_path: Path, 
+                              use_cb : bool = False,
+                              streamer: StreamerWithResults | Callable | StreamerBase = None):
     models_path : Path = tmp_path / model_id
     opt_model, hf_tokenizer = get_hugging_face_models(model_id)
 
@@ -421,7 +460,7 @@ def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_conf
 
     convert_models(opt_model, hf_tokenizer, models_path)
 
-    ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb)
+    ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb, streamer=streamer.accumulate if isinstance(streamer, StreamerWithResults) else streamer)
     hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)
 
     compare_generation_results(prompts, hf_results, ov_results, generation_config)
diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt
index 78cacd61ae..8d0f6d4e30 100644
--- a/tests/python_tests/requirements.txt
+++ b/tests/python_tests/requirements.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
-diffusers==0.32.1
+diffusers==0.32.2
 optimum-intel @ git+https://github.com/eaidova/optimum-intel@ea/stateful_seq2seq
 numpy<2.0.0; platform_system == "Darwin" and platform_machine == "x86_64"
 onnx==1.17.0
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index d7ce0b1ece..e8081f30e6 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import os
@@ -117,7 +117,7 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
 @pytest.mark.parametrize("model_descr", get_chat_models_list())
 @pytest.mark.precommit
 def test_chat_scenario_vs_stateful(model_descr, generation_config_kwargs: Dict):
-    model_id, models_path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, models_path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
     cb_pipe = get_continuous_batching(models_path)
 
     ov_pipe.start_chat()
diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py
index 0a42685b05..1b3c44b69e 100644
--- a/tests/python_tests/test_generation_config.py
+++ b/tests/python_tests/test_generation_config.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 from openvino_genai import GenerationConfig
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index 428047ea28..43fd82412f 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from dataclasses import dataclass
 from pathlib import Path
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 031c42a1dc..8968f2a083 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import openvino_genai as ov_genai
@@ -129,7 +129,7 @@ def test_chat_scenario(model_descr, generation_config_kwargs: Dict):
     chat_history_hf = []
     chat_history_ov = []
 
-    model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
 
     ov_generation_config = GenerationConfig(**generation_config_kwargs)
     hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
@@ -363,7 +363,7 @@ def test_unicode_pybind_decoding_one_string_streamer():
     ov_pipe = read_model((model_id, path))[4]
     res_str = []
     ov_pipe.generate(",", max_new_tokens=4, streamer=lambda x: res_str.append(x))
-    assert '�' == res_str[-1]
+    assert '�' == ''.join(res_str)[-1]
 
 #
 # Perf metrics
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index d2d3673356..ae5c475fd9 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -17,7 +17,6 @@
 from common import                                      \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
-    get_multinomial_temperature,                        \
     get_multinomial_all_parameters,                     \
     get_multinomial_temperature_and_presence_penalty,   \
     get_beam_search
@@ -34,7 +33,7 @@
                       'NPUW_ONLINE_PIPELINE': 'NONE',
                       'PREFILL_CONFIG': { },
                       'GENERATE_CONFIG': { }
-                }
+                } | get_default_properties()
 
 
 def generate_chat_history(model_path, device, pipeline_config, questions):
@@ -56,7 +55,7 @@ def test_generation_compare_with_stateful(generation_config):
     prompt = 'What is OpenVINO?'
     model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU")
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_properties())
     ref_out = stateful_pipe.generate(prompt, generation_config)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py
index 004d4f9d9d..7a3aced29a 100644
--- a/tests/python_tests/test_sampling.py
+++ b/tests/python_tests/test_sampling.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import sys
@@ -9,7 +9,7 @@
 from openvino_genai import GenerationConfig, StopCriteria
 from typing import List, TypedDict
 
-from common import get_hugging_face_models, convert_models, run_llm_pipeline_with_ref, run_llm_pipeline
+from common import get_hugging_face_models, convert_models, run_llm_pipeline_with_ref, run_llm_pipeline, compare_generation_results, StreamerWithResults
 
 
 @pytest.mark.precommit
@@ -58,13 +58,27 @@ def test_stop_strings(tmp_path, generation_config):
 @pytest.mark.precommit
 @pytest.mark.parametrize("generation_config",
                          [dict(max_new_tokens=30),
-                          dict(max_new_tokens=30, repetition_penalty=2.0),],
-                         ids=["basic",
-                              "repetition_penalty",])
-def test_greedy(tmp_path, generation_config):
-    prompts = [ "What is OpenVINO?" ]
+                          dict(max_new_tokens=30, repetition_penalty=2.0),
+                          dict(max_new_tokens=300)],
+                         ids=["basic", "repetition_penalty", "long_max_new_tokens"])
+@pytest.mark.parametrize("prompt", [
+    'What is OpenVINO?',
+    'table is made of', 
+    'The Sun is yellow because', 
+    '你好！ 你好嗎？',
+    'I have an interview about product speccing with the company Weekend Health. Give me an example of a question they might ask with regards about a new feature'
+])
+@pytest.mark.parametrize("use_cb", [True, False])
+def test_greedy(tmp_path, generation_config, prompt, use_cb):
     model_id : str = "katuni4ka/tiny-random-phi3"
-    run_llm_pipeline_with_ref(model_id, prompts, generation_config, tmp_path)
+    if sys.platform.startswith('win') and prompt.startswith('你'):
+        pytest.skip("For unknown reason this prompt fails on Win")
+
+    run_llm_pipeline_with_ref(model_id=model_id, 
+                            prompts=[prompt], 
+                            generation_config=generation_config, 
+                            tmp_path=tmp_path,
+                            use_cb=use_cb)
 
 
 @pytest.mark.precommit
@@ -319,13 +333,14 @@ def test_multinomial_sampling_against_reference(tmp_path, test_struct: RandomSam
     prompts = test_struct.prompts
     generation_config.rng_seed = 0
     generation_configs = generation_config
+
     model_id : str = "facebook/opt-125m"
     model, hf_tokenizer = get_hugging_face_models(model_id)
 
     models_path : Path = tmp_path / model_id
     convert_models(model, hf_tokenizer, models_path)
 
-    # run multinomial without comparison with reference
+    # Run multinomial without comparison with HF reference.
     _ = run_llm_pipeline(models_path, prompts, generation_configs)
 
     # Reference comparison is not performed as sampling results are non-deterministic.
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index 8129298763..d2faa227d5 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import os
@@ -181,12 +181,18 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
         print(f'ov_genai out: {ov_full_history_str}')
     assert ov_full_history_str == hf_full_history_str
 
+    # Test throwing exception for empty rendered chat template
+    # Example: Qwen2-VL chat template
+    chat_template_for_empty_output = "{% if messages is string %}{{ messages }}{% else %}{% for content in messages %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}"
+    with pytest.raises(Exception):
+        ov_tokenizer.apply_chat_template(conversation, chat_template=chat_template_for_empty_output)
+
 
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_set_chat_template():
     model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
 
     prompt = "how are you?"
     dummy_conversation = [
@@ -217,24 +223,36 @@ def test_set_chat_template():
 ]
 @pytest.mark.precommit
 @pytest.mark.nightly
-@pytest.mark.parametrize("add_special_tokens", [True, False])
-@pytest.mark.parametrize("skip_special_tokens", [True, False])
 @pytest.mark.parametrize("prompt", prompts)
-def test_encode_decode_with_special_tokens_option(add_special_tokens, skip_special_tokens, prompt):
+def test_encode_decode_with_special_tokens_option(prompt):
     import numpy as np
-    model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_descr = get_models_list()[0]
+    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1]))
     ov_tokenzier = ov_pipe.get_tokenizer()
 
     # Calling encode with 'add_special_tokens' will set state flag.
-    ov_res = ov_tokenzier.encode(prompt, add_special_tokens=add_special_tokens).input_ids.data
-    hf_res = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"]
-    assert np.all(ov_res == hf_res)
+    ov_res_add_spec = ov_tokenzier.encode(prompt, add_special_tokens=True).input_ids.data
+    ov_res_no_spec = ov_tokenzier.encode(prompt, add_special_tokens=False).input_ids.data
+    hf_res_add_spec = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=True)["input_ids"]
+    hf_res_no_spec = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=False)["input_ids"]
+    assert np.all(ov_res_add_spec == hf_res_add_spec)
+    assert np.all(ov_res_no_spec == hf_res_no_spec)
+    
+    # Check that add_special_tokens flag indeed made any difference
+    assert ov_res_add_spec.size != ov_res_no_spec.size
+    assert hf_res_add_spec.size != hf_res_no_spec.size
 
     # Decode with 'skip_special_tokens'
-    decoded_genai = ov_tokenzier.decode(ov_res, skip_special_tokens=skip_special_tokens)[0]
-    decoded_hf = hf_tokenizer.decode(hf_res[0], skip_special_tokens=skip_special_tokens)
-    assert decoded_genai == decoded_hf
+    decoded_genai_skip_spec = ov_tokenzier.decode(hf_res_add_spec, skip_special_tokens=True)[0]
+    decoded_genai_no_skip = ov_tokenzier.decode(hf_res_add_spec, skip_special_tokens=False)[0]
+    decoded_hf_skip_spec = hf_tokenizer.decode(hf_res_add_spec[0], skip_special_tokens=True)
+    decoded_hf_no_skip = hf_tokenizer.decode(hf_res_add_spec[0], skip_special_tokens=False)
+    assert decoded_genai_skip_spec == decoded_hf_skip_spec
+    assert decoded_genai_no_skip == decoded_hf_no_skip
+
+    # Check that skip_special_tokens indeed made any difference
+    assert decoded_genai_skip_spec != decoded_genai_no_skip
+    assert decoded_hf_skip_spec != decoded_hf_no_skip
 
 
 @pytest.mark.precommit
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index e6f897bcef..b413b6cf1d 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import openvino_tokenizers
@@ -46,6 +46,8 @@ def get_ov_model(cache):
 @pytest.mark.nightly
 def test_vlm_pipeline(cache):
     def streamer(word: str) -> bool:
+        nonlocal result_from_streamer
+        result_from_streamer.append(word)
         return False
 
     models_path = get_ov_model(cache)
@@ -59,10 +61,14 @@ def streamer(word: str) -> bool:
         ov_pipe = VLMPipeline(models_path, "CPU")
         ov_pipe.start_chat()
 
-        ov_pipe.generate(prompts[0], images=images, generation_config=generation_config, streamer=streamer)
+        result_from_streamer = []
+        res = ov_pipe.generate(prompts[0], images=images, generation_config=generation_config, streamer=streamer)
+        assert res.texts[0] == ''.join(result_from_streamer)
 
         for prompt in prompts[1:]:
-            ov_pipe.generate(prompt, generation_config=generation_config, streamer=streamer)
+            result_from_streamer = []
+            res = ov_pipe.generate(prompt, generation_config=generation_config, streamer=streamer)
+            assert res.texts[0] == ''.join(result_from_streamer)
 
         ov_pipe.finish_chat()
 
diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py
index 06d5e56b3c..4fe239b358 100644
--- a/tests/python_tests/test_whisper_pipeline.py
+++ b/tests/python_tests/test_whisper_pipeline.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import openvino_genai as ov_genai
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index d5f0abf827..09c7005e0d 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit d5f0abf8271f3cd8fc98d747b3e569fbeacca532
+Subproject commit 09c7005e0da46a50cc86b0e6e4ac9b8663a7af70
diff --git a/tools/cacheviz/__init__.py b/tools/cacheviz/__init__.py
index 88b5a71df7..b755d885d3 100644
--- a/tools/cacheviz/__init__.py
+++ b/tools/cacheviz/__init__.py
@@ -1,3 +1,3 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/tools/cacheviz/cacheviz.py b/tools/cacheviz/cacheviz.py
index b1ccf55098..841a5eeb65 100644
--- a/tools/cacheviz/cacheviz.py
+++ b/tools/cacheviz/cacheviz.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """
diff --git a/tools/continuous_batching/CMakeLists.txt b/tools/continuous_batching/CMakeLists.txt
index 284915e995..5aae9983a4 100644
--- a/tools/continuous_batching/CMakeLists.txt
+++ b/tools/continuous_batching/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Intel Corporation
+# Copyright (C) 2018-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
diff --git a/tools/continuous_batching/accuracy/continuous_batching_accuracy.cpp b/tools/continuous_batching/accuracy/continuous_batching_accuracy.cpp
index 8139193779..d644ba9418 100644
--- a/tools/continuous_batching/accuracy/continuous_batching_accuracy.cpp
+++ b/tools/continuous_batching/accuracy/continuous_batching_accuracy.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/tools/continuous_batching/accuracy/continuous_batching_speculative_decoding.cpp b/tools/continuous_batching/accuracy/continuous_batching_speculative_decoding.cpp
index 7112030432..eeb3c0f070 100644
--- a/tools/continuous_batching/accuracy/continuous_batching_speculative_decoding.cpp
+++ b/tools/continuous_batching/accuracy/continuous_batching_speculative_decoding.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
diff --git a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
index e0c50cda02..d7cad80fd0 100644
--- a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
+++ b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023-2024 Intel Corporation
+// Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #include <fstream>
diff --git a/tools/llm_bench/benchmark.py b/tools/llm_bench/benchmark.py
index 39b6306e7f..b9e038c12e 100644
--- a/tools/llm_bench/benchmark.py
+++ b/tools/llm_bench/benchmark.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import sys
@@ -147,9 +147,9 @@ def get_argprser():
     parser.add_argument("--draft_device", required=False, default=None, help="Inference device for Speculative decoding of draft model")
     parser.add_argument("--draft_cb_config", required=False, default=None,
                         help="Path to file with Continuous Batching Scheduler settings or dict for Speculative decoding of draft model")
-    parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding")
+    parser.add_argument("--num_assistant_tokens", required=False, default=None, help="Config option num_assistant_tokens for Speculative decoding", type=int)
     parser.add_argument("--assistant_confidence_threshold", required=False, default=None,
-                        help="Config option assistant_confidence_threshold for Speculative decoding")
+                        help="Config option assistant_confidence_threshold for Speculative decoding", type=float)
     parser.add_argument(
         '--end_token_stopping',
         action='store_true',
diff --git a/tools/llm_bench/convert.py b/tools/llm_bench/convert.py
index 49cea02c11..aa9ab504de 100644
--- a/tools/llm_bench/convert.py
+++ b/tools/llm_bench/convert.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import sys
 import gc
diff --git a/tools/llm_bench/llm_bench_utils/config_class.py b/tools/llm_bench/llm_bench_utils/config_class.py
index 9c149c98b6..76b56fb4c5 100644
--- a/tools/llm_bench/llm_bench_utils/config_class.py
+++ b/tools/llm_bench/llm_bench_utils/config_class.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from transformers import AutoTokenizer
 from transformers import AutoModelForCausalLM, T5ForConditionalGeneration, BlenderbotForConditionalGeneration, AutoModel
diff --git a/tools/llm_bench/llm_bench_utils/conversion_utils/better_transformer_patch.py b/tools/llm_bench/llm_bench_utils/conversion_utils/better_transformer_patch.py
index 4def9cfa0a..7f77576487 100644
--- a/tools/llm_bench/llm_bench_utils/conversion_utils/better_transformer_patch.py
+++ b/tools/llm_bench/llm_bench_utils/conversion_utils/better_transformer_patch.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import math
diff --git a/tools/llm_bench/llm_bench_utils/conversion_utils/convert_patch.py b/tools/llm_bench/llm_bench_utils/conversion_utils/convert_patch.py
index 8edf785ad6..df5d333e04 100644
--- a/tools/llm_bench/llm_bench_utils/conversion_utils/convert_patch.py
+++ b/tools/llm_bench/llm_bench_utils/conversion_utils/convert_patch.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import torch
diff --git a/tools/llm_bench/llm_bench_utils/conversion_utils/export_configs.py b/tools/llm_bench/llm_bench_utils/conversion_utils/export_configs.py
index cf465b8f53..ba17308a42 100644
--- a/tools/llm_bench/llm_bench_utils/conversion_utils/export_configs.py
+++ b/tools/llm_bench/llm_bench_utils/conversion_utils/export_configs.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from optimum.exporters.onnx.config import TextDecoderOnnxConfig, TextDecoderWithPositionIdsOnnxConfig
 from optimum.exporters.tasks import TasksManager
diff --git a/tools/llm_bench/llm_bench_utils/conversion_utils/helpers.py b/tools/llm_bench/llm_bench_utils/conversion_utils/helpers.py
index 5c6e05588e..606d6557dc 100644
--- a/tools/llm_bench/llm_bench_utils/conversion_utils/helpers.py
+++ b/tools/llm_bench/llm_bench_utils/conversion_utils/helpers.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import copy
 import json
diff --git a/tools/llm_bench/llm_bench_utils/gen_output_data.py b/tools/llm_bench/llm_bench_utils/gen_output_data.py
index b65e7b5c8c..594903912d 100644
--- a/tools/llm_bench/llm_bench_utils/gen_output_data.py
+++ b/tools/llm_bench/llm_bench_utils/gen_output_data.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/tools/llm_bench/llm_bench_utils/hook_beam_search.py b/tools/llm_bench/llm_bench_utils/hook_beam_search.py
index d933acc3a5..49f9db8236 100644
--- a/tools/llm_bench/llm_bench_utils/hook_beam_search.py
+++ b/tools/llm_bench/llm_bench_utils/hook_beam_search.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/hook_common.py b/tools/llm_bench/llm_bench_utils/hook_common.py
index c805680cee..3b0d623737 100644
--- a/tools/llm_bench/llm_bench_utils/hook_common.py
+++ b/tools/llm_bench/llm_bench_utils/hook_common.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import logging as log
diff --git a/tools/llm_bench/llm_bench_utils/hook_greedy_search.py b/tools/llm_bench/llm_bench_utils/hook_greedy_search.py
index 9039a99e69..86a0eec4ad 100644
--- a/tools/llm_bench/llm_bench_utils/hook_greedy_search.py
+++ b/tools/llm_bench/llm_bench_utils/hook_greedy_search.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/hook_sample.py b/tools/llm_bench/llm_bench_utils/hook_sample.py
index 22111c1a3f..a1e970bf4a 100644
--- a/tools/llm_bench/llm_bench_utils/hook_sample.py
+++ b/tools/llm_bench/llm_bench_utils/hook_sample.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/hook_sample_v43.py b/tools/llm_bench/llm_bench_utils/hook_sample_v43.py
index 7dce578dac..94d0aa8370 100644
--- a/tools/llm_bench/llm_bench_utils/hook_sample_v43.py
+++ b/tools/llm_bench/llm_bench_utils/hook_sample_v43.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/hook_sample_v45.py b/tools/llm_bench/llm_bench_utils/hook_sample_v45.py
index 1644c63a4f..2a43717e9b 100644
--- a/tools/llm_bench/llm_bench_utils/hook_sample_v45.py
+++ b/tools/llm_bench/llm_bench_utils/hook_sample_v45.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/memory_profile.py b/tools/llm_bench/llm_bench_utils/memory_profile.py
index 25bf33c938..34604f8533 100644
--- a/tools/llm_bench/llm_bench_utils/memory_profile.py
+++ b/tools/llm_bench/llm_bench_utils/memory_profile.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from threading import Event, Thread
 import psutil
diff --git a/tools/llm_bench/llm_bench_utils/metrics_print.py b/tools/llm_bench/llm_bench_utils/metrics_print.py
index c1446b8bfc..7a5cf21fa6 100644
--- a/tools/llm_bench/llm_bench_utils/metrics_print.py
+++ b/tools/llm_bench/llm_bench_utils/metrics_print.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import logging as log
 
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
index d51bc8edc6..ae61cf96dd 100644
--- a/tools/llm_bench/llm_bench_utils/model_utils.py
+++ b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import os
diff --git a/tools/llm_bench/llm_bench_utils/output_csv.py b/tools/llm_bench/llm_bench_utils/output_csv.py
index 42d75996fd..ea1402f82f 100644
--- a/tools/llm_bench/llm_bench_utils/output_csv.py
+++ b/tools/llm_bench/llm_bench_utils/output_csv.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import csv
 import numpy as np
diff --git a/tools/llm_bench/llm_bench_utils/output_file.py b/tools/llm_bench/llm_bench_utils/output_file.py
index 8efbb430a7..06fa46dbd6 100644
--- a/tools/llm_bench/llm_bench_utils/output_file.py
+++ b/tools/llm_bench/llm_bench_utils/output_file.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 
diff --git a/tools/llm_bench/llm_bench_utils/ov_model_classes.py b/tools/llm_bench/llm_bench_utils/ov_model_classes.py
index 0ade0f1299..17cfab4e02 100644
--- a/tools/llm_bench/llm_bench_utils/ov_model_classes.py
+++ b/tools/llm_bench/llm_bench_utils/ov_model_classes.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 # flake8: noqa
 import time
diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
index c45190e87a..606a39cecf 100644
--- a/tools/llm_bench/llm_bench_utils/ov_utils.py
+++ b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from pathlib import Path
 from transformers import AutoConfig, AutoProcessor, AutoTokenizer
@@ -243,9 +243,13 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
 
     draft_model_path = kwargs.get("draft_model", '')
     cb = kwargs.get("use_cb", False)
-    if cb or draft_model_path:
+    cb_config = kwargs.get("cb_config")
+    use_streamer_metrics = False
+    if cb or cb_config is not None or draft_model_path:
         log.info("Continuous Batching mode activated")
-        ov_config["scheduler_config"] = get_scheduler_config_genai(kwargs.get("cb_config"))
+        ov_config["scheduler_config"] = get_scheduler_config_genai(cb_config)
+
+        use_streamer_metrics = not openvino_genai.get_version().startswith("2025.") or draft_model_path
 
     if draft_model_path:
         if not Path(draft_model_path).exists():
@@ -292,7 +296,7 @@ def get_tokens(self):
 
         def get_time_list(self):
             return self.token_generation_time
-    streamer = TokenStreamer(llm_pipe.get_tokenizer()) if cb or draft_model_path else None
+    streamer = TokenStreamer(llm_pipe.get_tokenizer()) if use_streamer_metrics else None
 
     return llm_pipe, tokenizer, end - start, streamer, True
 
diff --git a/tools/llm_bench/llm_bench_utils/parse_json_data.py b/tools/llm_bench/llm_bench_utils/parse_json_data.py
index 28fbd298cd..37f71d2075 100644
--- a/tools/llm_bench/llm_bench_utils/parse_json_data.py
+++ b/tools/llm_bench/llm_bench_utils/parse_json_data.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/tools/llm_bench/llm_bench_utils/pt_utils.py b/tools/llm_bench/llm_bench_utils/pt_utils.py
index 877c135a3c..a22dec9578 100644
--- a/tools/llm_bench/llm_bench_utils/pt_utils.py
+++ b/tools/llm_bench/llm_bench_utils/pt_utils.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from pathlib import Path
 import torch
diff --git a/tools/llm_bench/task/image_generation.py b/tools/llm_bench/task/image_generation.py
index 02229a5284..f2fc518636 100644
--- a/tools/llm_bench/task/image_generation.py
+++ b/tools/llm_bench/task/image_generation.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time
diff --git a/tools/llm_bench/task/speech_to_text_generation.py b/tools/llm_bench/task/speech_to_text_generation.py
index 15a47a8b6a..cb34a81a2f 100644
--- a/tools/llm_bench/task/speech_to_text_generation.py
+++ b/tools/llm_bench/task/speech_to_text_generation.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time
diff --git a/tools/llm_bench/task/super_resolution_generation.py b/tools/llm_bench/task/super_resolution_generation.py
index afc41fb627..c2f3cff6e4 100644
--- a/tools/llm_bench/task/super_resolution_generation.py
+++ b/tools/llm_bench/task/super_resolution_generation.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time
diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py
index c768d427e7..de798f158f 100644
--- a/tools/llm_bench/task/text_generation.py
+++ b/tools/llm_bench/task/text_generation.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time
@@ -181,14 +181,6 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
             log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
                         f"is different from md5 of the {num - 1} iteration {prev_md5}")
             metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
-            if not args.get("use_cb", False):
-                if num == 1:
-                    # if the device is CPU, throw exception
-                    if args['devices'].lower().startswith('cpu') is True:
-                        assert (result_md5_list == prev_md5)
-                else:
-                    # throw exception
-                    assert (result_md5_list == prev_md5)
     else:
         metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
     if bench_hook is not None:
@@ -231,10 +223,10 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
     if args.get('draft_model', ''):
         config_info = "Speculative decoding config: "
         if args.get('num_assistant_tokens', None):
-            gen_config.num_assistant_tokens = args['num_assistant_tokens']
+            gen_config.num_assistant_tokens = int(args['num_assistant_tokens'])
             config_info += f" num_assistant_tokens {gen_config.num_assistant_tokens}"
         if args.get('assistant_confidence_threshold', None):
-            gen_config.assistant_confidence_threshold = args['assistant_confidence_threshold']
+            gen_config.assistant_confidence_threshold = float(args['assistant_confidence_threshold'])
             config_info += f" assistant_confidence_threshold {gen_config.assistant_confidence_threshold}"
         log.info(config_info)
     start = time.perf_counter()
@@ -339,14 +331,6 @@ def token_printer():
             log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
                         f"is different from md5 of the {num - 1} iteration {prev_md5}")
             metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
-            if not args.get("use_cb", False):
-                if num == 1:
-                    # if the device is CPU, throw exception
-                    if args['devices'].lower().startswith('cpu') is True:
-                        assert (result_md5_list == prev_md5)
-                else:
-                    # throw exception
-                    assert (result_md5_list == prev_md5)
     else:
         metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
 
@@ -461,14 +445,6 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
             log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
                         f"is different from md5 of the {num - 1} iteration {prev_md5}")
             metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
-            if not args.get("use_cb", False):
-                if num == 1:
-                    # if the device is CPU, throw exception
-                    if args['devices'].lower().startswith('cpu') is True:
-                        assert (result_md5_list == prev_md5)
-                else:
-                    # throw exception
-                    assert (result_md5_list == prev_md5)
     else:
         metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0], prompt_idx=prompt_index)
     streamer.reset()
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 068ae0cf60..a5fb0ecc0c 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import time