diff --git a/samples/python/README.md b/samples/python/README.md
index e6970cc..73bb0b7 100644
--- a/samples/python/README.md
+++ b/samples/python/README.md
@@ -55,5 +55,7 @@ python stable_diffusion_v2_1\stable_diffusion_v2_1.py --prompt "spectacular view
 | stable_diffusion_v1_5  | 2.24 | python stable_diffusion_v2_1\stable_diffusion_v2_1.py --prompt "the prompt string ..." |
 | stable_diffusion_v2_1  | 2.24 | python stable_diffusion_v1_5\stable_diffusion_v1_5.py --prompt "the prompt string ..." |
 | riffusion  | 2.24 | python riffusion\riffusion.py --prompt "the prompt string ..." |
+| real_esrgan_x4plus  | 2.28 | python real_esrgan_x4plus\real_esrgan_x4plus.py |
+| real_esrgan_general_x4v3  | 2.28 | python real_esrgan_general_x4v3\real_esrgan_general_x4v3.py |
 
 *More models will be supported soon!*
diff --git a/samples/python/aotgan/README.md b/samples/python/aotgan/README.md
index 1b5cf56..4743596 100644
--- a/samples/python/aotgan/README.md
+++ b/samples/python/aotgan/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\aotgan\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\aotgan\qnn\QnnHtp.dll
-C:\ai-hub\aotgan\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\aotgan\qnn\QnnSystem.dll
-C:\ai-hub\aotgan\qnn\libqnnhtpv73.cat
+C:\ai-hub\aotgan\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\aotgan\qai_libs\QnnHtp.dll
+C:\ai-hub\aotgan\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\aotgan\qai_libs\QnnSystem.dll
+C:\ai-hub\aotgan\qai_libs\libqnnhtpv73.cat
 ```
 
 ## aotgan QNN models
diff --git a/samples/python/aotgan/aotgan.py b/samples/python/aotgan/aotgan.py
index 8b1786a..3510ec5 100644
--- a/samples/python/aotgan/aotgan.py
+++ b/samples/python/aotgan/aotgan.py
@@ -70,7 +70,7 @@ def Init():
     global aotgan
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for AotGan objects.
     aotgan_model = "models\\aotgan.bin"
diff --git a/samples/python/docs/guide.md b/samples/python/docs/guide.md
index 33316b7..afbdce6 100644
--- a/samples/python/docs/guide.md
+++ b/samples/python/docs/guide.md
@@ -17,13 +17,11 @@ C:\ai-hub\model_name\
 
 Set up a new folder called `qnn` and copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\model_name\qnn\libqnnhtpv73.cat
-C:\ai-hub\model_name\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\model_name\qnn\QnnCpu.dll
-C:\ai-hub\model_name\qnn\QnnHtp.dll
-C:\ai-hub\model_name\qnn\QnnHtpPrepare.dll
-C:\ai-hub\model_name\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\model_name\qnn\QnnSystem.dll
+C:\ai-hub\model_name\qai_libs\libqnnhtpv73.cat
+C:\ai-hub\model_name\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\model_name\qai_libs\QnnHtp.dll
+C:\ai-hub\model_name\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\model_name\qai_libs\QnnSystem.dll
 ```
 
 ## Prepare the QNN model
@@ -174,7 +172,7 @@ def Init():
     global lamadilated
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for LamaDilated objects.
     lamadilated_model = "models\\lama_dilated.bin"
diff --git a/samples/python/fastsam_x/README.md b/samples/python/fastsam_x/README.md
index 39f6ad9..27766bb 100644
--- a/samples/python/fastsam_x/README.md
+++ b/samples/python/fastsam_x/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\unet_segmentation\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\unet_segmentation\qnn\QnnHtp.dll
-C:\ai-hub\unet_segmentation\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\unet_segmentation\qnn\QnnSystem.dll
-C:\ai-hub\unet_segmentation\qnn\libqnnhtpv73.cat
+C:\ai-hub\unet_segmentation\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\unet_segmentation\qai_libs\QnnHtp.dll
+C:\ai-hub\unet_segmentation\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\unet_segmentation\qai_libs\QnnSystem.dll
+C:\ai-hub\unet_segmentation\qai_libs\libqnnhtpv73.cat
 ```
 
 ## unet_segmentation QNN models
diff --git a/samples/python/fastsam_x/fastsam_x.py b/samples/python/fastsam_x/fastsam_x.py
index cfa6e47..cde1020 100644
--- a/samples/python/fastsam_x/fastsam_x.py
+++ b/samples/python/fastsam_x/fastsam_x.py
@@ -151,7 +151,7 @@ def Init():
     global fastsam
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for FastSam_x objects.
     fastsam_model = "models\\fastsam_x.bin"
diff --git a/samples/python/inception_v3/README.md b/samples/python/inception_v3/README.md
index 1b0873d..3dcfd95 100644
--- a/samples/python/inception_v3/README.md
+++ b/samples/python/inception_v3/README.md
@@ -10,11 +10,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\inception_v3\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\inception_v3\qnn\QnnHtp.dll
-C:\ai-hub\inception_v3\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\inception_v3\qnn\QnnSystem.dll
-C:\ai-hub\inception_v3\qnn\libqnnhtpv73.cat
+C:\ai-hub\inception_v3\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\inception_v3\qai_libs\QnnHtp.dll
+C:\ai-hub\inception_v3\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\inception_v3\qai_libs\QnnSystem.dll
+C:\ai-hub\inception_v3\qai_libs\libqnnhtpv73.cat
 ```
 
 ## inception_v3 QNN models
diff --git a/samples/python/inception_v3/inception_v3.py b/samples/python/inception_v3/inception_v3.py
index ef93dfa..4fa6e06 100644
--- a/samples/python/inception_v3/inception_v3.py
+++ b/samples/python/inception_v3/inception_v3.py
@@ -50,7 +50,7 @@ def Init():
     global inceptionV3
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for InceptionV3 objects.
     inceptionV3_model = "models\\inception_v3.bin"
diff --git a/samples/python/lama_dilated/README.md b/samples/python/lama_dilated/README.md
index 1a78f9d..4a15674 100644
--- a/samples/python/lama_dilated/README.md
+++ b/samples/python/lama_dilated/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\lama_dilated\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\lama_dilated\qnn\QnnHtp.dll
-C:\ai-hub\lama_dilated\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\lama_dilated\qnn\QnnSystem.dll
-C:\ai-hub\lama_dilated\qnn\libqnnhtpv73.cat
+C:\ai-hub\lama_dilated\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\lama_dilated\qai_libs\QnnHtp.dll
+C:\ai-hub\lama_dilated\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\lama_dilated\qai_libs\QnnSystem.dll
+C:\ai-hub\lama_dilated\qai_libs\libqnnhtpv73.cat
 ```
 
 ## lama_dilated QNN models
diff --git a/samples/python/lama_dilated/lama_dilated.py b/samples/python/lama_dilated/lama_dilated.py
index 5629db2..0f4c6da 100644
--- a/samples/python/lama_dilated/lama_dilated.py
+++ b/samples/python/lama_dilated/lama_dilated.py
@@ -70,7 +70,7 @@ def Init():
     global lamadilated
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for LamaDilated objects.
     lamadilated_model = "models\\lama_dilated.bin"
diff --git a/samples/python/openpose/README.md b/samples/python/openpose/README.md
index bb1e0d0..dd9057e 100644
--- a/samples/python/openpose/README.md
+++ b/samples/python/openpose/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\openpose\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\openpose\qnn\QnnHtp.dll
-C:\ai-hub\openpose\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\openpose\qnn\QnnSystem.dll
-C:\ai-hub\openpose\qnn\libqnnhtpv73.cat
+C:\ai-hub\openpose\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\openpose\qai_libs\QnnHtp.dll
+C:\ai-hub\openpose\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\openpose\qai_libs\QnnSystem.dll
+C:\ai-hub\openpose\qai_libs\libqnnhtpv73.cat
 ```
 
 ## openpose QNN models
diff --git a/samples/python/openpose/openpose.py b/samples/python/openpose/openpose.py
index 6f18cd1..2b4eb06 100644
--- a/samples/python/openpose/openpose.py
+++ b/samples/python/openpose/openpose.py
@@ -427,7 +427,7 @@ def Init():
     global openpose
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for OpnPose objects.
     openpose_model = "models\\openpose.bin"
diff --git a/samples/python/real_esrgan_general_x4v3/README.md b/samples/python/real_esrgan_general_x4v3/README.md
index 9fcbe4e..7a1bd8f 100644
--- a/samples/python/real_esrgan_general_x4v3/README.md
+++ b/samples/python/real_esrgan_general_x4v3/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\real_esrgan_general_x4v3\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnHtp.dll
-C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnSystem.dll
-C:\ai-hub\real_esrgan_general_x4v3\qnn\libqnnhtpv73.cat
+C:\ai-hub\real_esrgan_general_x4v3\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnHtp.dll
+C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnSystem.dll
+C:\ai-hub\real_esrgan_general_x4v3\qai_libs\libqnnhtpv73.cat
 ```
 
 ## real_esrgan_general_x4v3 QNN models
@@ -25,10 +25,9 @@ You can refer to below links on how to setup AI Hub envirinment and how to use A
 https://aihub.qualcomm.com/get-started
 http://app.aihub.qualcomm.com/docs/
 
-a. Download the latest 'ai-hub-models' code and install it to Python environment:
+a. Install the AI Hub Python packages:
 ```
-git clone --recursive https://github.com/quic/ai-hub-models.git
-pip install -e .
+pip install qai-hub qai_hub_models
 ```
 
 b. Use below commmand to generate QNN model which suppor 515x512 input resolution:
@@ -45,7 +44,7 @@ Scheduled compile job (j1p86jxog) successfully. To see the status and results:
 ```
 After downloaded the model, copy it to the following path:
 ```
-C:\ai-hub\real_esrgan_general_x4v3\models\real_esrgan_general_x4v3_512.bin
+C:\ai-hub\real_esrgan_general_x4v3\models\real_esrgan_general_x4v3.bin
 ```
 
 ## Run the sample code
diff --git a/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py b/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py
index acf7ed9..f27e7cb 100644
--- a/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py
+++ b/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py
@@ -3,10 +3,13 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 
+import sys
 import os
+sys.path.append(".")
+sys.path.append("..")
+import utils.install as install
 import cv2
 import numpy as np
-
 import torch
 import torchvision.transforms as transforms
 from PIL import Image
@@ -16,17 +19,29 @@
 
 ####################################################################
 
+MODEL_ID = "mn0j68p8m"
+MODEL_NAME = "real_esrgan_general_x4v3"
+MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models"
+IMAGE_SIZE = 512
+
+####################################################################
+
 execution_ws = os.getcwd()
-qnn_dir = execution_ws + "\\qnn"
+qnn_dir = execution_ws + "\\qai_libs"
+
+if not MODEL_NAME in execution_ws:
+    execution_ws = execution_ws + "\\" + MODEL_NAME
+
+model_dir = execution_ws + "\\models"
+madel_path = model_dir + "\\" + MODEL_NAME + ".bin"
 
-image_size = 512
 image_buffer = None
 realesrgan = None
 
 def preprocess_PIL_image(image: Image) -> torch.Tensor:
     """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW."""
-    transform = transforms.Compose([transforms.Resize(image_size),      # bgr image
-                                    transforms.CenterCrop(image_size),
+    transform = transforms.Compose([transforms.Resize(IMAGE_SIZE),      # bgr image
+                                    transforms.CenterCrop(IMAGE_SIZE),
                                     transforms.PILToTensor()])
     img: torch.Tensor = transform(image)  # type: ignore
     img = img.float() / 255.0  # int 0 - 255 to float 0.0 - 1.0
@@ -47,15 +62,26 @@ def Inference(self, input_data):
         output_data = super().Inference(input_datas)[0]        
         return output_data
 
+def model_download():
+    ret = True
+
+    desc = f"Downloading {MODEL_NAME} model... "
+    fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}"
+    ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail)
+
+    if not ret:
+        exit()
+
 def Init():
     global realesrgan
 
+    model_download()
+
     # Config AppBuilder environment.
     QNNConfig.Config(qnn_dir, Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for RealESRGan objects.
-    realesrgan_model = "models\\real_esrgan_general_x4v3_512.bin"
-    realesrgan = RealESRGan("realesrgan", realesrgan_model)
+    realesrgan = RealESRGan("realesrgan", madel_path)
 
 def Inference(input_image_path, output_image_path):
     global image_buffer
@@ -75,11 +101,12 @@ def Inference(input_image_path, output_image_path):
     PerfProfile.RelPerfProfileGlobal()
 
     output_image = torch.from_numpy(output_image)
-    output_image = output_image.reshape(image_size * 4, image_size * 4, 3)
+    output_image = output_image.reshape(IMAGE_SIZE * 4, IMAGE_SIZE * 4, 3)
     output_image = torch.unsqueeze(output_image, 0)
     output_image = [torch_tensor_to_PIL_image(img) for img in output_image]
     image_buffer = output_image[0]
     image_buffer.save(output_image_path)
+    image_buffer.show()
 
 def Release():
     global realesrgan
@@ -90,7 +117,7 @@ def Release():
 
 Init()
 
-Inference("input.png", "output.png")
+Inference(execution_ws + "\\input.png", execution_ws + "\\output.png")
 
 Release()
 
diff --git a/samples/python/real_esrgan_x4plus/README.md b/samples/python/real_esrgan_x4plus/README.md
index d167469..fb7f500 100644
--- a/samples/python/real_esrgan_x4plus/README.md
+++ b/samples/python/real_esrgan_x4plus/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\real_esrgan_x4plus\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\real_esrgan_x4plus\qnn\QnnHtp.dll
-C:\ai-hub\real_esrgan_x4plus\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\real_esrgan_x4plus\qnn\QnnSystem.dll
-C:\ai-hub\real_esrgan_x4plus\qnn\libqnnhtpv73.cat
+C:\ai-hub\real_esrgan_x4plus\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnHtp.dll
+C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnSystem.dll
+C:\ai-hub\real_esrgan_x4plus\qai_libs\libqnnhtpv73.cat
 ```
 
 ## real_esrgan_x4plus QNN models
@@ -25,10 +25,9 @@ You can refer to below links on how to setup AI Hub envirinment and how to use A
 https://aihub.qualcomm.com/get-started
 http://app.aihub.qualcomm.com/docs/
 
-a. Download the latest 'ai-hub-models' code and install it to Python environment:
+a. Install the AI Hub Python packages:
 ```
-git clone --recursive https://github.com/quic/ai-hub-models.git
-pip install -e .
+pip install qai-hub qai_hub_models
 ```
 
 b. Use below commmand to generate QNN model which suppor 515x512 input resolution:
@@ -45,7 +44,7 @@ Scheduled compile job (j1p86jxog) successfully. To see the status and results:
 ```
 After downloaded the model, copy it to the following path:
 ```
-C:\ai-hub\real_esrgan_x4plus\models\realesrgan_x4_512.bin
+C:\ai-hub\real_esrgan_x4plus\models\real_esrgan_x4plus.bin
 ```
 
 ## Run the sample code
diff --git a/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py b/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py
index f09421d..fa02f9a 100644
--- a/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py
+++ b/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py
@@ -3,10 +3,13 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 
+import sys
 import os
+sys.path.append(".")
+sys.path.append("..")
+import utils.install as install
 import cv2
 import numpy as np
-
 import torch
 import torchvision.transforms as transforms
 from PIL import Image
@@ -16,17 +19,29 @@
 
 ####################################################################
 
+MODEL_ID = "m7qk01okn"
+MODEL_NAME = "real_esrgan_x4plus"
+MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models"
+IMAGE_SIZE = 512
+
+####################################################################
+
 execution_ws = os.getcwd()
-qnn_dir = execution_ws + "\\qnn"
+qnn_dir = execution_ws + "\\qai_libs"
+
+if not MODEL_NAME in execution_ws:
+    execution_ws = execution_ws + "\\" + MODEL_NAME
+
+model_dir = execution_ws + "\\models"
+madel_path = model_dir + "\\" + MODEL_NAME + ".bin"
 
-image_size = 512
 image_buffer = None
 realesrgan = None
 
 def preprocess_PIL_image(image: Image) -> torch.Tensor:
     """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW."""
-    transform = transforms.Compose([transforms.Resize(image_size),      # bgr image
-                                    transforms.CenterCrop(image_size),
+    transform = transforms.Compose([transforms.Resize(IMAGE_SIZE),      # bgr image
+                                    transforms.CenterCrop(IMAGE_SIZE),
                                     transforms.PILToTensor()])
     img: torch.Tensor = transform(image)  # type: ignore
     img = img.float().unsqueeze(0) / 255.0  # int 0 - 255 to float 0.0 - 1.0
@@ -47,15 +62,26 @@ def Inference(self, input_data):
         output_data = super().Inference(input_datas)[0]        
         return output_data
 
+def model_download():
+    ret = True
+
+    desc = f"Downloading {MODEL_NAME} model... "
+    fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}"
+    ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail)
+
+    if not ret:
+        exit()
+
 def Init():
     global realesrgan
 
+    model_download()
+
     # Config AppBuilder environment.
     QNNConfig.Config(qnn_dir, Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for RealESRGan objects.
-    realesrgan_model = "models\\realesrgan_x4_512.bin"
-    realesrgan = RealESRGan("realesrgan", realesrgan_model)
+    realesrgan = RealESRGan("realesrgan", madel_path)
 
 def Inference(input_image_path, output_image_path):
     global image_buffer
@@ -74,11 +100,12 @@ def Inference(input_image_path, output_image_path):
     PerfProfile.RelPerfProfileGlobal()
 
     output_image = torch.from_numpy(output_image)
-    output_image = output_image.reshape(3, image_size * 4, image_size * 4)
+    output_image = output_image.reshape(3, IMAGE_SIZE * 4, IMAGE_SIZE * 4)
     output_image = torch.unsqueeze(output_image, 0)
     output_image = [torch_tensor_to_PIL_image(img) for img in output_image]
     image_buffer = output_image[0]
     image_buffer.save(output_image_path)
+    image_buffer.show()
 
 def Release():
     global realesrgan
@@ -89,7 +116,7 @@ def Release():
 
 Init()
 
-Inference("input.png", "output.png")
+Inference(execution_ws + "\\input.png", execution_ws + "\\output.png")
 
 Release()
 
diff --git a/samples/python/riffusion/Riffusion.py b/samples/python/riffusion/Riffusion.py
index e33778f..a6def61 100644
--- a/samples/python/riffusion/Riffusion.py
+++ b/samples/python/riffusion/Riffusion.py
@@ -307,7 +307,7 @@ def model_download():
 
     desc = "Downloading timestep_embedding model... "
     fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n"
-    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail)
+    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T)
 
     if not ret:
         if not os.path.exists(time_embedding_dir):  # There is no timestep_embedding data, exit process.
diff --git a/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py b/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py
index 4fbc3b4..06d2462 100644
--- a/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py
+++ b/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py
@@ -307,7 +307,7 @@ def model_download():
 
     desc = "Downloading timestep_embedding model... "
     fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n"
-    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail)
+    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T)
 
     if not ret:
         if not os.path.exists(time_embedding_dir):  # There is no timestep_embedding data, exit process.
diff --git a/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py b/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py
index b846d02..846ad62 100644
--- a/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py
+++ b/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py
@@ -300,7 +300,7 @@ def model_download():
 
     desc = "Downloading timestep_embedding model... "
     fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n"
-    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail)
+    ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T)
 
     if not ret:
         if not os.path.exists(time_embedding_dir):  # There is no timestep_embedding data, exit process.
diff --git a/samples/python/unet_segmentation/README.md b/samples/python/unet_segmentation/README.md
index 39f6ad9..27766bb 100644
--- a/samples/python/unet_segmentation/README.md
+++ b/samples/python/unet_segmentation/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\unet_segmentation\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\unet_segmentation\qnn\QnnHtp.dll
-C:\ai-hub\unet_segmentation\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\unet_segmentation\qnn\QnnSystem.dll
-C:\ai-hub\unet_segmentation\qnn\libqnnhtpv73.cat
+C:\ai-hub\unet_segmentation\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\unet_segmentation\qai_libs\QnnHtp.dll
+C:\ai-hub\unet_segmentation\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\unet_segmentation\qai_libs\QnnSystem.dll
+C:\ai-hub\unet_segmentation\qai_libs\libqnnhtpv73.cat
 ```
 
 ## unet_segmentation QNN models
diff --git a/samples/python/unet_segmentation/unet_segmentation.py b/samples/python/unet_segmentation/unet_segmentation.py
index 004b1a6..eae9065 100644
--- a/samples/python/unet_segmentation/unet_segmentation.py
+++ b/samples/python/unet_segmentation/unet_segmentation.py
@@ -109,7 +109,7 @@ def Init():
     global unetsegmentation
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for UnetSegmentation objects.
     unetsegmentation_model = "models\\unet_segmentation.bin"
diff --git a/samples/python/utils/install.py b/samples/python/utils/install.py
index ab2a066..7cf2549 100644
--- a/samples/python/utils/install.py
+++ b/samples/python/utils/install.py
@@ -30,15 +30,16 @@
 TEXT_RUN_SCRIPT_AGAIN = "Then run this Python script again."
 
 QNN_SDK_ROOT="C:\\Qualcomm\\AIStack\\QAIRT\\"
-HUB_ID="aac24f12d047e7f558d8effe4b2fdad0f5c2c341"
+HUB_ID_T="aac24f12d047e7f558d8effe4b2fdad0f5c2c341"
+HUB_ID_Q="a916bc04400e033f60fdd73c615e5780e2ba206a"
 QAI_HUB_CONFIG = os.path.join(Path.home(), ".qai_hub", "client.ini")
 QAI_HUB_CONFIG_BACKUP = os.path.join(Path.home(), ".qai_hub", "client.ini.bk")
 
 
-def setup_qai_hub():
+def setup_qai_hub(hub_id):
     if os.path.isfile(QAI_HUB_CONFIG):
         shutil.copy(QAI_HUB_CONFIG, QAI_HUB_CONFIG_BACKUP)
-    run_command(f"qai-hub.exe configure --api_token {HUB_ID} > NUL", False)
+    run_command(f"qai-hub.exe configure --api_token {hub_id} > NUL", False)
 
 
 def reset_qai_hub():
@@ -51,7 +52,7 @@ def is_file_exists(filepath):
         return True
     return False
 
-def download_qai_hubmodel(model_id, filepath, desc=None, fail=None):
+def download_qai_hubmodel(model_id, filepath, desc=None, fail=None, hub_id=HUB_ID_Q):
     ret = True
 
     if is_file_exists(filepath):
@@ -65,7 +66,7 @@ def download_qai_hubmodel(model_id, filepath, desc=None, fail=None):
     else:
         print(f"Downloading {os.path.basename(filepath)}...")
 
-    setup_qai_hub()
+    setup_qai_hub(hub_id)
     try:
         model = qai_hub.get_model(model_id)
         model.download(filename=filepath)
@@ -279,13 +280,13 @@ def is_installed(package):
         import importlib.metadata
         import importlib.util
         dist = importlib.metadata.distribution(package)
-    except importlib.metadata.PackageNotFoundError:
+    except importlib.metadata.PackageNotFoundError as e:
+        # print(e)
         try:
             spec = importlib.util.find_spec(package)
         except ModuleNotFoundError:
-            return False
-
-        return spec is not None
+            return None
+        return None
 
     return dist
 
diff --git a/samples/python/yolov8_det/README.md b/samples/python/yolov8_det/README.md
index 55f2ab2..433bc15 100644
--- a/samples/python/yolov8_det/README.md
+++ b/samples/python/yolov8_det/README.md
@@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md
 
 Copy the QNN libraries from QNN SDK to below path:
 ```
-C:\ai-hub\yolov8_det\qnn\libQnnHtpV73Skel.so
-C:\ai-hub\yolov8_det\qnn\QnnHtp.dll
-C:\ai-hub\yolov8_det\qnn\QnnHtpV73Stub.dll
-C:\ai-hub\yolov8_det\qnn\QnnSystem.dll
-C:\ai-hub\yolov8_det\qnn\libqnnhtpv73.cat
+C:\ai-hub\yolov8_det\qai_libs\libQnnHtpV73Skel.so
+C:\ai-hub\yolov8_det\qai_libs\QnnHtp.dll
+C:\ai-hub\yolov8_det\qai_libs\QnnHtpV73Stub.dll
+C:\ai-hub\yolov8_det\qai_libs\QnnSystem.dll
+C:\ai-hub\yolov8_det\qai_libs\libqnnhtpv73.cat
 ```
 
 ## yolov8_det QNN models
diff --git a/samples/python/yolov8_det/yolov8_det.py b/samples/python/yolov8_det/yolov8_det.py
index 8cfdb74..76131ce 100644
--- a/samples/python/yolov8_det/yolov8_det.py
+++ b/samples/python/yolov8_det/yolov8_det.py
@@ -288,7 +288,7 @@ def Init():
     global yolov8
 
     # Config AppBuilder environment.
-    QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
+    QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for YoloV8 objects.
     yolov8_model = "models\\yolov8_det.bin"
diff --git a/src/Utils/BuildId.hpp b/src/Utils/BuildId.hpp
index 153786e..153e187 100644
--- a/src/Utils/BuildId.hpp
+++ b/src/Utils/BuildId.hpp
@@ -11,7 +11,7 @@
 namespace qnn {
 namespace tools {
 
-inline std::string getBuildId() { return std::string("v2.28.0.240827110523_99241"); }
+inline std::string getBuildId() { return std::string("v2.28.0.241029232508_102474"); }
 
 }  // namespace tools
 }  // namespace qnn
diff --git a/src/Utils/DataUtil.cpp b/src/Utils/DataUtil.cpp
index 659587e..4502728 100644
--- a/src/Utils/DataUtil.cpp
+++ b/src/Utils/DataUtil.cpp
@@ -15,9 +15,11 @@
 #include <intrin.h>
 #include "DataUtil.hpp"
 #include "Logger.hpp"
+#ifndef __hexagon__
 #include "PAL/Directory.hpp"
 #include "PAL/FileOp.hpp"
 #include "PAL/Path.hpp"
+#endif
 
 using namespace qnn;
 using namespace qnn::tools;
@@ -186,6 +188,7 @@ datautil::StatusCode datautil::readBinaryFromFile(std::string filePath,
   return StatusCode::SUCCESS;
 }
 
+#ifndef __hexagon__
 datautil::StatusCode datautil::writeDataToFile(std::string fileDir,
                                                std::string fileName,
                                                std::vector<size_t> dims,
@@ -275,8 +278,9 @@ datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir,
   os.write(reinterpret_cast<char*>(buffer), bufferSize);
   return StatusCode::SUCCESS;
 }
+#endif
 
-
+// Enabling fp16 execution
 static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) {
     const uint32_t w = (uint32_t) h << 16;
     const uint32_t sign = w & UINT32_C(0x80000000);
@@ -297,6 +301,7 @@ static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) {
     return fp32_from_bits(result);
 }
 
+// Enabling fp16 execution
 /*
  * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in
  * IEEE half-precision format, in bit representation.
@@ -304,8 +309,6 @@ static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) {
  * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals)
  * floating-point operations and bitcasts between integer and floating-point variables.
  */
-
-
 bool datautil::floatNToFloat32(float* out,
                      uint8_t* in,
                      size_t numElements,
@@ -338,6 +341,7 @@ bool datautil::floatNToFloat32(float* out,
     return true;
 }
 
+// Enabling fp16 execution
 static inline float datautil::fp32_from_bits(uint32_t w) {
 #if defined(__OPENCL_VERSION__)
     return as_float(w);
@@ -356,6 +360,7 @@ static inline float datautil::fp32_from_bits(uint32_t w) {
 #endif
 }
 
+// Enabling fp16 execution
 static inline uint32_t datautil::fp32_to_bits(float f) {
 #if defined(__OPENCL_VERSION__)
     return as_uint(f);
@@ -374,6 +379,7 @@ static inline uint32_t datautil::fp32_to_bits(float f) {
 #endif
 }
 
+// Enabling fp16 execution
 static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) {
  #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
      const float scale_to_inf = 0x1.0p+112f;
@@ -400,6 +406,7 @@ static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) {
      return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
  }
 
+// Enabling fp16 execution
 bool datautil::float32ToFloatN(uint8_t* out,
                        float* in,
                        size_t numElements,
@@ -514,6 +521,10 @@ template datautil::StatusCode datautil::castToFloat<uint32_t>(float* out,
                                                               uint32_t* in,
                                                               size_t numElements);
 
+template datautil::StatusCode datautil::castToFloat<uint64_t>(float* out,
+                                                              uint64_t* in,
+                                                              size_t numElements);
+
 template datautil::StatusCode datautil::castToFloat<int8_t>(float* out,
                                                             int8_t* in,
                                                             size_t numElements);
@@ -526,6 +537,10 @@ template datautil::StatusCode datautil::castToFloat<int32_t>(float* out,
                                                              int32_t* in,
                                                              size_t numElements);
 
+template datautil::StatusCode datautil::castToFloat<int64_t>(float* out,
+                                                             int64_t* in,
+                                                             size_t numElements);
+
 template <typename T_QuantType>
 datautil::StatusCode datautil::castFromFloat(T_QuantType* out, float* in, size_t numElements) {
   if (nullptr == out || nullptr == in) {
@@ -550,6 +565,10 @@ template datautil::StatusCode datautil::castFromFloat<uint32_t>(uint32_t* out,
                                                                 float* in,
                                                                 size_t numElements);
 
+template datautil::StatusCode datautil::castFromFloat<uint64_t>(uint64_t* out,
+                                                                float* in,
+                                                                size_t numElements);
+
 template datautil::StatusCode datautil::castFromFloat<int8_t>(int8_t* out,
                                                               float* in,
                                                               size_t numElements);
@@ -559,5 +578,9 @@ template datautil::StatusCode datautil::castFromFloat<int16_t>(int16_t* out,
                                                                size_t numElements);
 
 template datautil::StatusCode datautil::castFromFloat<int32_t>(int32_t* out,
+                                                               float* in,
+                                                               size_t numElements);
+
+template datautil::StatusCode datautil::castFromFloat<int64_t>(int64_t* out,
                                                                float* in,
                                                                size_t numElements);
\ No newline at end of file
diff --git a/src/Utils/DataUtil.hpp b/src/Utils/DataUtil.hpp
index c030a86..7bbfd15 100644
--- a/src/Utils/DataUtil.hpp
+++ b/src/Utils/DataUtil.hpp
@@ -68,6 +68,7 @@ ReadBatchDataRetType_t readBatchData(const std::vector<std::string>& filePaths,
 
 StatusCode readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize);
 
+#ifndef __hexagon__
 StatusCode writeDataToFile(std::string fileDir,
                            std::string fileName,
                            std::vector<size_t> dims,
@@ -85,22 +86,15 @@ StatusCode writeBinaryToFile(std::string fileDir,
                              std::string fileName,
                              uint8_t* buffer,
                              size_t bufferSize);
+#endif
 
+// Enabling fp16 execution
 static inline uint16_t fp16_ieee_from_fp32_value(float f);
 static inline float fp16_ieee_to_fp32_value(uint16_t h);
-
 static inline uint32_t fp32_to_bits(float f);
 static inline float fp32_from_bits(uint32_t w);
-
-bool floatNToFloat32(float* out,
-                     uint8_t* in,
-                     size_t numElements,
-                     uint8_t bitWidth);
-
-bool float32ToFloatN(uint8_t* out,
-                       float* in,
-                       size_t numElements,
-                       uint8_t bitWidth);
+bool floatNToFloat32(float* out, uint8_t* in, size_t numElements, uint8_t bitWidth);
+bool float32ToFloatN(uint8_t* out, float* in, size_t numElements, uint8_t bitWidth);
 
 template <typename T_QuantType>
 datautil::StatusCode floatToTfN(
diff --git a/src/Utils/DynamicLoadUtil.cpp b/src/Utils/DynamicLoadUtil.cpp
index da6f122..cb04eba 100644
--- a/src/Utils/DynamicLoadUtil.cpp
+++ b/src/Utils/DynamicLoadUtil.cpp
@@ -39,13 +39,8 @@ dynamicloadutil::StatusCode dynamicloadutil::getQnnFunctionPointers(
     void** backendHandleRtn,
     bool loadModelLib,
     void** modelHandleRtn) {
-#if defined(__ANDROID__)
-  void* libBackendHandle = pal::dynamicloading::dlOpen(
-      backendPath.c_str(), pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL);
-#else
   void* libBackendHandle = pal::dynamicloading::dlOpen(
       backendPath.c_str(), pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_GLOBAL);
-#endif
   if (nullptr == libBackendHandle) {
     QNN_ERROR("Unable to load backend. pal::dynamicloading::dlError(): %s",
               pal::dynamicloading::dlError());
diff --git a/src/Utils/IOTensor.cpp b/src/Utils/IOTensor.cpp
index ee9d6b2..c0ccebe 100644
--- a/src/Utils/IOTensor.cpp
+++ b/src/Utils/IOTensor.cpp
@@ -14,9 +14,11 @@
 #include "DataUtil.hpp"
 #include "IOTensor.hpp"
 #include "Logger.hpp"
+#ifndef __hexagon__
 #include "PAL/Directory.hpp"
 #include "PAL/FileOp.hpp"
 #include "PAL/Path.hpp"
+#endif
 #include "PAL/StringOp.hpp"
 #include "QnnTypeMacros.hpp"
 
@@ -71,7 +73,7 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe
   fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor));
 
   switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
-    case QNN_DATATYPE_FLOAT_16:
+    case QNN_DATATYPE_FLOAT_16:     // zw. Enabling fp16 execution
 #ifdef __hexagon__
       QNN_ERROR("failure in aiswutility::float32ToFloatN, not supported on Hexagon");
       returnStatus = StatusCode::FAILURE;
@@ -135,6 +137,17 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe
       }
       break;
 
+    case QNN_DATATYPE_UINT_64:
+      if (datautil::StatusCode::SUCCESS !=
+          datautil::castFromFloat<uint64_t>(
+              static_cast<uint64_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+              floatBuffer,
+              datautil::calculateElementCount(dims))) {
+        QNN_ERROR("failure in castFromFloat<uint64_t>");
+        returnStatus = StatusCode::FAILURE;
+      }
+      break;
+
     case QNN_DATATYPE_INT_8:
       if (datautil::StatusCode::SUCCESS !=
           datautil::castFromFloat<int8_t>(
@@ -168,6 +181,17 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe
       }
       break;
 
+    case QNN_DATATYPE_INT_64:
+      if (datautil::StatusCode::SUCCESS !=
+          datautil::castFromFloat<int64_t>(
+              static_cast<int64_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+              floatBuffer,
+              datautil::calculateElementCount(dims))) {
+        QNN_ERROR("failure in castFromFloat<int64_t>");
+        returnStatus = StatusCode::FAILURE;
+      }
+      break;
+
     case QNN_DATATYPE_BOOL_8:
       if (datautil::StatusCode::SUCCESS !=
           datautil::castFromFloat<uint8_t>(
@@ -314,6 +338,7 @@ iotensor::PopulateInputTensorsRetType_t iotensor::IOTensor::populateInputTensors
   return {StatusCode::SUCCESS, numFilesPopulated, numBatchSize};
 }
 
+// zw. Optimize performance.
 // Helper method to populate an input tensor in the graph during execution.
 // It relies on reading data from buffer provided during executeGraph() call.
 iotensor::StatusCode iotensor::IOTensor::populateInputTensor(
@@ -345,6 +370,7 @@ iotensor::StatusCode iotensor::IOTensor::populateInputTensor(
   return StatusCode::SUCCESS;
 }
 
+// zw. Optimize performance.
 // Helper method to populate all input tensors.
 iotensor::StatusCode iotensor::IOTensor::populateInputTensors(
     uint32_t graphIdx,
@@ -374,6 +400,7 @@ iotensor::StatusCode iotensor::IOTensor::populateInputTensors(
   return StatusCode::SUCCESS;
 }
 
+// zw. Optimize performance.
 iotensor::StatusCode iotensor::IOTensor::getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector<size_t>& size) {
   if (nullptr == tensorWrappers) {
     QNN_ERROR("tensorWrappers is nullptr");
@@ -542,7 +569,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer,
   size_t elementCount = datautil::calculateElementCount(dims);
   auto returnStatus   = StatusCode::SUCCESS;
   switch (dataType) {
-    case QNN_DATATYPE_FLOAT_16:
+    case QNN_DATATYPE_FLOAT_16:     // zw. Enabling fp16 execution
     case QNN_DATATYPE_FLOAT_32:
       QNN_DEBUG("allocating float buffer");
       returnStatus = allocateBuffer<float>(reinterpret_cast<float**>(buffer), elementCount);
@@ -565,6 +592,11 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer,
       returnStatus = allocateBuffer<uint32_t>(reinterpret_cast<uint32_t**>(buffer), elementCount);
       break;
 
+    case QNN_DATATYPE_UINT_64:
+      QNN_DEBUG("allocating uint64_t buffer");
+      returnStatus = allocateBuffer<uint64_t>(reinterpret_cast<uint64_t**>(buffer), elementCount);
+      break;
+
     case QNN_DATATYPE_INT_8:
       QNN_DEBUG("allocating int8_t buffer");
       returnStatus = allocateBuffer<int8_t>(reinterpret_cast<int8_t**>(buffer), elementCount);
@@ -580,6 +612,11 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer,
       returnStatus = allocateBuffer<int32_t>(reinterpret_cast<int32_t**>(buffer), elementCount);
       break;
 
+    case QNN_DATATYPE_INT_64:
+      QNN_DEBUG("allocating int64_t buffer");
+      returnStatus = allocateBuffer<int64_t>(reinterpret_cast<int64_t**>(buffer), elementCount);
+      break;
+
     case QNN_DATATYPE_BOOL_8:
       QNN_DEBUG("allocating bool buffer");
       returnStatus = allocateBuffer<uint8_t>(reinterpret_cast<uint8_t**>(buffer), elementCount);
@@ -611,6 +648,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(T** buffer, size_t& elem
 // Convert data to float or de-quantization. This is used when
 // user requests for float output and the model produces
 // non-float output.
+#ifndef __hexagon__
 iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_t* tensor) {
   if (nullptr == tensor) {
     QNN_ERROR("tensors is nullptr");
@@ -630,7 +668,7 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_
     return returnStatus;
   }
   switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
-    case QNN_DATATYPE_FLOAT_16:
+    case QNN_DATATYPE_FLOAT_16:     // zw. Enabling fp16 execution
       if (!datautil::floatNToFloat32(
               *out, reinterpret_cast<uint8_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), elementCount, 16)) {
         QNN_ERROR("failure in aiswutility::floatNToFloat32");
@@ -697,6 +735,17 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_
       }
       break;
 
+    case QNN_DATATYPE_UINT_64:
+      if (datautil::StatusCode::SUCCESS !=
+          datautil::castToFloat<uint64_t>(
+              *out,
+              reinterpret_cast<uint64_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+              elementCount)) {
+        QNN_ERROR("failure in castToFloat<uint64_t>");
+        returnStatus = StatusCode::FAILURE;
+      }
+      break;
+
     case QNN_DATATYPE_INT_8:
       if (datautil::StatusCode::SUCCESS !=
           datautil::castToFloat<int8_t>(
@@ -730,6 +779,17 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_
       }
       break;
 
+    case QNN_DATATYPE_INT_64:
+      if (datautil::StatusCode::SUCCESS !=
+          datautil::castToFloat<int64_t>(
+              *out,
+              reinterpret_cast<int64_t*>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+              elementCount)) {
+        QNN_ERROR("failure in castToFloat<int64_t>");
+        returnStatus = StatusCode::FAILURE;
+      }
+      break;
+
     case QNN_DATATYPE_BOOL_8:
       if (datautil::StatusCode::SUCCESS !=
           datautil::castToFloat<uint8_t>(
@@ -888,6 +948,7 @@ iotensor::StatusCode iotensor::IOTensor::writeOutputTensors(uint32_t graphIdx,
   }
   return returnStatus;
 }
+#endif
 
 // Helper method to allocate a buffer and copy data to it.
 iotensor::StatusCode iotensor::IOTensor::allocateAndCopyBuffer(uint8_t** buffer,
diff --git a/src/Utils/IOTensor.hpp b/src/Utils/IOTensor.hpp
index 868e2c3..255401b 100644
--- a/src/Utils/IOTensor.hpp
+++ b/src/Utils/IOTensor.hpp
@@ -40,6 +40,7 @@ class IOTensor {
                                         Qnn_Tensor_t **outputs,
                                         qnn_wrapper_api::GraphInfo_t graphInfo);
 
+#ifndef __hexagon__
   StatusCode writeOutputTensors(uint32_t graphIdx,
                                 size_t startIdx,
                                 char *graphName,
@@ -50,6 +51,7 @@ class IOTensor {
                                 std::string outputPath,
                                 size_t numInputFilesPopulated,
                                 size_t outputBatchSize);
+#endif
 
   PopulateInputTensorsRetType_t populateInputTensors(
       uint32_t graphIdx,
@@ -61,6 +63,7 @@ class IOTensor {
       qnn_wrapper_api::GraphInfo_t graphInfo,
       iotensor::InputDataType inputDataType);
 
+  // zw. Optimize performance.
   StatusCode populateInputTensors(uint32_t graphIdx,
                                   std::vector<uint8_t *> inputBuffers,
                                   Qnn_Tensor_t *inputs,
@@ -72,11 +75,13 @@ class IOTensor {
                                            size_t numInputTensors,
                                            size_t numOutputTensors);
 
+#ifndef __hexagon__
   StatusCode convertToFloat(float **out, Qnn_Tensor_t *output);		// zw: change it to public function.
+ #endif
  
   StatusCode fillDims(std::vector<size_t> &dims, uint32_t *inDimensions, uint32_t rank);	// zw: change it to public function.
 
-  StatusCode getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector<size_t>& size);
+  StatusCode getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector<size_t>& size);     // zw. Optimize performance.
 
  private:
   PopulateInputTensorsRetType_t populateInputTensor(const std::vector<std::string> &filePaths,
@@ -85,7 +90,7 @@ class IOTensor {
                                                     Qnn_Tensor_t *input,
                                                     InputDataType inputDataType);
 
-  StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, InputDataType inputDataType);
+  StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, InputDataType inputDataType);    // zw. Optimize performance.
 
   PopulateInputTensorsRetType_t readDataAndAllocateBuffer(const std::vector<std::string> &filePaths,
                                                           const size_t filePathsIndexOffset,
@@ -97,6 +102,7 @@ class IOTensor {
   template <typename T>
   StatusCode allocateBuffer(T **buffer, size_t &elementCount);
 
+#ifndef __hexagon__
   StatusCode convertAndWriteOutputTensorInFloat(Qnn_Tensor_t *output,
                                                 std::vector<std::string> outputPaths,
                                                 std::string fileName,
@@ -106,6 +112,7 @@ class IOTensor {
                                std::vector<std::string> outputPaths,
                                std::string fileName,
                                size_t outputBatchSize);
+#endif
 
   StatusCode allocateAndCopyBuffer(uint8_t **buffer, Qnn_Tensor_t *tensor);
 
@@ -120,4 +127,4 @@ class IOTensor {
 };
 }  // namespace iotensor
 }  // namespace tools
-}  // namespace qnn
+}  // namespace qnn
\ No newline at end of file
diff --git a/src/Utils/QnnSampleAppUtils.cpp b/src/Utils/QnnSampleAppUtils.cpp
index 55b8432..b2672d2 100644
--- a/src/Utils/QnnSampleAppUtils.cpp
+++ b/src/Utils/QnnSampleAppUtils.cpp
@@ -15,9 +15,11 @@
 #include <tuple>
 
 #include "Logger.hpp"
+#ifndef __hexagon__
 #include "PAL/Directory.hpp"
 #include "PAL/FileOp.hpp"
 #include "PAL/Path.hpp"
+#endif
 #include "PAL/StringOp.hpp"
 #include "QnnSampleAppUtils.hpp"
 #include "QnnTypeMacros.hpp"
@@ -224,7 +226,16 @@ bool sample_app::deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *sr
                              QNN_TENSOR_GET_DIMENSIONS(src),
                              QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t));
     }
+    if (QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(src)) {
+      QNN_TENSOR_SET_IS_DYNAMIC_DIMENSIONS(
+          dst, (uint8_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t)));
+      pal::StringOp::memscpy(QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(dst),
+                             QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t),
+                             QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(src),
+                             QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t));
+    }
   }
+  QNN_TENSOR_SET_SPARSE_PARAMS(dst, QNN_TENSOR_GET_SPARSE_PARAMS(src));
   return true;
 }
 
@@ -249,41 +260,41 @@ bool sample_app::copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
   return returnStatus;
 }
 
-bool sample_app::copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t* graphInfoSrc,
-    qnn_wrapper_api::GraphInfo_t* graphInfoDst) {
-    graphInfoDst->graphName = nullptr;
-    if (graphInfoSrc->graphName) {
-        graphInfoDst->graphName =
-            pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
-    }
-    graphInfoDst->inputTensors = nullptr;
-    graphInfoDst->numInputTensors = 0;
-    if (graphInfoSrc->graphInputs) {
-        if (!copyTensorsInfo(
+bool sample_app::copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
+                                  qnn_wrapper_api::GraphInfo_t *graphInfoDst) {
+  graphInfoDst->graphName = nullptr;
+  if (graphInfoSrc->graphName) {
+    graphInfoDst->graphName =
+        pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
+  }
+  graphInfoDst->inputTensors    = nullptr;
+  graphInfoDst->numInputTensors = 0;
+  if (graphInfoSrc->graphInputs) {
+    if (!copyTensorsInfo(
             graphInfoSrc->graphInputs, graphInfoDst->inputTensors, graphInfoSrc->numGraphInputs)) {
-            return false;
-        }
-        graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
+      return false;
     }
-    graphInfoDst->outputTensors = nullptr;
-    graphInfoDst->numOutputTensors = 0;
-    if (graphInfoSrc->graphOutputs) {
-        if (!copyTensorsInfo(graphInfoSrc->graphOutputs,
-            graphInfoDst->outputTensors,
-            graphInfoSrc->numGraphOutputs)) {
-            return false;
-        }
-        graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
+    graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
+  }
+  graphInfoDst->outputTensors    = nullptr;
+  graphInfoDst->numOutputTensors = 0;
+  if (graphInfoSrc->graphOutputs) {
+    if (!copyTensorsInfo(graphInfoSrc->graphOutputs,
+                         graphInfoDst->outputTensors,
+                         graphInfoSrc->numGraphOutputs)) {
+      return false;
     }
-    return true;
+    graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
+  }
+  return true;
 }
 
-bool sample_app::copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
+bool sample_app::copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *graphInfoSrc,
                                   qnn_wrapper_api::GraphInfo_t *graphInfoDst) {
   graphInfoDst->graphName = nullptr;
   if (graphInfoSrc->graphName) {
     graphInfoDst->graphName =
-        pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
+            pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
   }
   graphInfoDst->inputTensors    = nullptr;
   graphInfoDst->numInputTensors = 0;
@@ -329,9 +340,8 @@ bool sample_app::copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
       QNN_DEBUG("Extracting graphsInfo for graph Idx: %d", gIdx);
       if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
         copyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, &graphInfoArr[gIdx]);
-      }
-      else if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
-          copyGraphsInfoV3(&graphsInput[gIdx].graphInfoV3, &graphInfoArr[gIdx]);
+      } else if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) {
+        copyGraphsInfoV3(&graphsInput[gIdx].graphInfoV3, &graphInfoArr[gIdx]);
       }
       graphsInfo[gIdx] = graphInfoArr + gIdx;
     }
@@ -390,8 +400,7 @@ bool sample_app::copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *b
       graphsCount = binaryInfo->contextBinaryInfoV2.numGraphs;
       return true;
     }
-  }
-  else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
+  } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
     if (binaryInfo->contextBinaryInfoV3.graphs) {
       if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV3.graphs,
                           binaryInfo->contextBinaryInfoV3.numGraphs,