diff --git a/samples/python/README.md b/samples/python/README.md index e6970cc..73bb0b7 100644 --- a/samples/python/README.md +++ b/samples/python/README.md @@ -55,5 +55,7 @@ python stable_diffusion_v2_1\stable_diffusion_v2_1.py --prompt "spectacular view | stable_diffusion_v1_5 | 2.24 | python stable_diffusion_v2_1\stable_diffusion_v2_1.py --prompt "the prompt string ..." | | stable_diffusion_v2_1 | 2.24 | python stable_diffusion_v1_5\stable_diffusion_v1_5.py --prompt "the prompt string ..." | | riffusion | 2.24 | python riffusion\riffusion.py --prompt "the prompt string ..." | +| real_esrgan_x4plus | 2.28 | python real_esrgan_x4plus\real_esrgan_x4plus.py | +| real_esrgan_general_x4v3 | 2.28 | python real_esrgan_general_x4v3\real_esrgan_general_x4v3.py | *More models will be supported soon!* diff --git a/samples/python/aotgan/README.md b/samples/python/aotgan/README.md index 1b5cf56..4743596 100644 --- a/samples/python/aotgan/README.md +++ b/samples/python/aotgan/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\aotgan\qnn\libQnnHtpV73Skel.so -C:\ai-hub\aotgan\qnn\QnnHtp.dll -C:\ai-hub\aotgan\qnn\QnnHtpV73Stub.dll -C:\ai-hub\aotgan\qnn\QnnSystem.dll -C:\ai-hub\aotgan\qnn\libqnnhtpv73.cat +C:\ai-hub\aotgan\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\aotgan\qai_libs\QnnHtp.dll +C:\ai-hub\aotgan\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\aotgan\qai_libs\QnnSystem.dll +C:\ai-hub\aotgan\qai_libs\libqnnhtpv73.cat ``` ## aotgan QNN models diff --git a/samples/python/aotgan/aotgan.py b/samples/python/aotgan/aotgan.py index 8b1786a..3510ec5 100644 --- a/samples/python/aotgan/aotgan.py +++ b/samples/python/aotgan/aotgan.py @@ -70,7 +70,7 @@ def Init(): global aotgan # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for AotGan objects. aotgan_model = "models\\aotgan.bin" diff --git a/samples/python/docs/guide.md b/samples/python/docs/guide.md index 33316b7..afbdce6 100644 --- a/samples/python/docs/guide.md +++ b/samples/python/docs/guide.md @@ -17,13 +17,11 @@ C:\ai-hub\model_name\ Set up a new folder called `qnn` and copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\model_name\qnn\libqnnhtpv73.cat -C:\ai-hub\model_name\qnn\libQnnHtpV73Skel.so -C:\ai-hub\model_name\qnn\QnnCpu.dll -C:\ai-hub\model_name\qnn\QnnHtp.dll -C:\ai-hub\model_name\qnn\QnnHtpPrepare.dll -C:\ai-hub\model_name\qnn\QnnHtpV73Stub.dll -C:\ai-hub\model_name\qnn\QnnSystem.dll +C:\ai-hub\model_name\qai_libs\libqnnhtpv73.cat +C:\ai-hub\model_name\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\model_name\qai_libs\QnnHtp.dll +C:\ai-hub\model_name\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\model_name\qai_libs\QnnSystem.dll ``` ## Prepare the QNN model @@ -174,7 +172,7 @@ def Init(): global lamadilated # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for LamaDilated objects. lamadilated_model = "models\\lama_dilated.bin" diff --git a/samples/python/fastsam_x/README.md b/samples/python/fastsam_x/README.md index 39f6ad9..27766bb 100644 --- a/samples/python/fastsam_x/README.md +++ b/samples/python/fastsam_x/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\unet_segmentation\qnn\libQnnHtpV73Skel.so -C:\ai-hub\unet_segmentation\qnn\QnnHtp.dll -C:\ai-hub\unet_segmentation\qnn\QnnHtpV73Stub.dll -C:\ai-hub\unet_segmentation\qnn\QnnSystem.dll -C:\ai-hub\unet_segmentation\qnn\libqnnhtpv73.cat +C:\ai-hub\unet_segmentation\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\unet_segmentation\qai_libs\QnnHtp.dll +C:\ai-hub\unet_segmentation\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\unet_segmentation\qai_libs\QnnSystem.dll +C:\ai-hub\unet_segmentation\qai_libs\libqnnhtpv73.cat ``` ## unet_segmentation QNN models diff --git a/samples/python/fastsam_x/fastsam_x.py b/samples/python/fastsam_x/fastsam_x.py index cfa6e47..cde1020 100644 --- a/samples/python/fastsam_x/fastsam_x.py +++ b/samples/python/fastsam_x/fastsam_x.py @@ -151,7 +151,7 @@ def Init(): global fastsam # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for FastSam_x objects. fastsam_model = "models\\fastsam_x.bin" diff --git a/samples/python/inception_v3/README.md b/samples/python/inception_v3/README.md index 1b0873d..3dcfd95 100644 --- a/samples/python/inception_v3/README.md +++ b/samples/python/inception_v3/README.md @@ -10,11 +10,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\inception_v3\qnn\libQnnHtpV73Skel.so -C:\ai-hub\inception_v3\qnn\QnnHtp.dll -C:\ai-hub\inception_v3\qnn\QnnHtpV73Stub.dll -C:\ai-hub\inception_v3\qnn\QnnSystem.dll -C:\ai-hub\inception_v3\qnn\libqnnhtpv73.cat +C:\ai-hub\inception_v3\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\inception_v3\qai_libs\QnnHtp.dll +C:\ai-hub\inception_v3\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\inception_v3\qai_libs\QnnSystem.dll +C:\ai-hub\inception_v3\qai_libs\libqnnhtpv73.cat ``` ## inception_v3 QNN models diff --git a/samples/python/inception_v3/inception_v3.py b/samples/python/inception_v3/inception_v3.py index ef93dfa..4fa6e06 100644 --- a/samples/python/inception_v3/inception_v3.py +++ b/samples/python/inception_v3/inception_v3.py @@ -50,7 +50,7 @@ def Init(): global inceptionV3 # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for InceptionV3 objects. inceptionV3_model = "models\\inception_v3.bin" diff --git a/samples/python/lama_dilated/README.md b/samples/python/lama_dilated/README.md index 1a78f9d..4a15674 100644 --- a/samples/python/lama_dilated/README.md +++ b/samples/python/lama_dilated/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\lama_dilated\qnn\libQnnHtpV73Skel.so -C:\ai-hub\lama_dilated\qnn\QnnHtp.dll -C:\ai-hub\lama_dilated\qnn\QnnHtpV73Stub.dll -C:\ai-hub\lama_dilated\qnn\QnnSystem.dll -C:\ai-hub\lama_dilated\qnn\libqnnhtpv73.cat +C:\ai-hub\lama_dilated\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\lama_dilated\qai_libs\QnnHtp.dll +C:\ai-hub\lama_dilated\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\lama_dilated\qai_libs\QnnSystem.dll +C:\ai-hub\lama_dilated\qai_libs\libqnnhtpv73.cat ``` ## lama_dilated QNN models diff --git a/samples/python/lama_dilated/lama_dilated.py b/samples/python/lama_dilated/lama_dilated.py index 5629db2..0f4c6da 100644 --- a/samples/python/lama_dilated/lama_dilated.py +++ b/samples/python/lama_dilated/lama_dilated.py @@ -70,7 +70,7 @@ def Init(): global lamadilated # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for LamaDilated objects. lamadilated_model = "models\\lama_dilated.bin" diff --git a/samples/python/openpose/README.md b/samples/python/openpose/README.md index bb1e0d0..dd9057e 100644 --- a/samples/python/openpose/README.md +++ b/samples/python/openpose/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\openpose\qnn\libQnnHtpV73Skel.so -C:\ai-hub\openpose\qnn\QnnHtp.dll -C:\ai-hub\openpose\qnn\QnnHtpV73Stub.dll -C:\ai-hub\openpose\qnn\QnnSystem.dll -C:\ai-hub\openpose\qnn\libqnnhtpv73.cat +C:\ai-hub\openpose\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\openpose\qai_libs\QnnHtp.dll +C:\ai-hub\openpose\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\openpose\qai_libs\QnnSystem.dll +C:\ai-hub\openpose\qai_libs\libqnnhtpv73.cat ``` ## openpose QNN models diff --git a/samples/python/openpose/openpose.py b/samples/python/openpose/openpose.py index 6f18cd1..2b4eb06 100644 --- a/samples/python/openpose/openpose.py +++ b/samples/python/openpose/openpose.py @@ -427,7 +427,7 @@ def Init(): global openpose # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for OpnPose objects. openpose_model = "models\\openpose.bin" diff --git a/samples/python/real_esrgan_general_x4v3/README.md b/samples/python/real_esrgan_general_x4v3/README.md index 9fcbe4e..7a1bd8f 100644 --- a/samples/python/real_esrgan_general_x4v3/README.md +++ b/samples/python/real_esrgan_general_x4v3/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\real_esrgan_general_x4v3\qnn\libQnnHtpV73Skel.so -C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnHtp.dll -C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnHtpV73Stub.dll -C:\ai-hub\real_esrgan_general_x4v3\qnn\QnnSystem.dll -C:\ai-hub\real_esrgan_general_x4v3\qnn\libqnnhtpv73.cat +C:\ai-hub\real_esrgan_general_x4v3\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnHtp.dll +C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\real_esrgan_general_x4v3\qai_libs\QnnSystem.dll +C:\ai-hub\real_esrgan_general_x4v3\qai_libs\libqnnhtpv73.cat ``` ## real_esrgan_general_x4v3 QNN models @@ -25,10 +25,9 @@ You can refer to below links on how to setup AI Hub envirinment and how to use A https://aihub.qualcomm.com/get-started http://app.aihub.qualcomm.com/docs/ -a. Download the latest 'ai-hub-models' code and install it to Python environment: +a. Install the AI Hub Python packages: ``` -git clone --recursive https://github.com/quic/ai-hub-models.git -pip install -e . +pip install qai-hub qai_hub_models ``` b. Use below commmand to generate QNN model which suppor 515x512 input resolution: @@ -45,7 +44,7 @@ Scheduled compile job (j1p86jxog) successfully. To see the status and results: ``` After downloaded the model, copy it to the following path: ``` -C:\ai-hub\real_esrgan_general_x4v3\models\real_esrgan_general_x4v3_512.bin +C:\ai-hub\real_esrgan_general_x4v3\models\real_esrgan_general_x4v3.bin ``` ## Run the sample code diff --git a/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py b/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py index acf7ed9..f27e7cb 100644 --- a/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py +++ b/samples/python/real_esrgan_general_x4v3/real_esrgan_general_x4v3.py @@ -3,10 +3,13 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +import sys import os +sys.path.append(".") +sys.path.append("..") +import utils.install as install import cv2 import numpy as np - import torch import torchvision.transforms as transforms from PIL import Image @@ -16,17 +19,29 @@ #################################################################### +MODEL_ID = "mn0j68p8m" +MODEL_NAME = "real_esrgan_general_x4v3" +MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models" +IMAGE_SIZE = 512 + +#################################################################### + execution_ws = os.getcwd() -qnn_dir = execution_ws + "\\qnn" +qnn_dir = execution_ws + "\\qai_libs" + +if not MODEL_NAME in execution_ws: + execution_ws = execution_ws + "\\" + MODEL_NAME + +model_dir = execution_ws + "\\models" +madel_path = model_dir + "\\" + MODEL_NAME + ".bin" -image_size = 512 image_buffer = None realesrgan = None def preprocess_PIL_image(image: Image) -> torch.Tensor: """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW.""" - transform = transforms.Compose([transforms.Resize(image_size), # bgr image - transforms.CenterCrop(image_size), + transform = transforms.Compose([transforms.Resize(IMAGE_SIZE), # bgr image + transforms.CenterCrop(IMAGE_SIZE), transforms.PILToTensor()]) img: torch.Tensor = transform(image) # type: ignore img = img.float() / 255.0 # int 0 - 255 to float 0.0 - 1.0 @@ -47,15 +62,26 @@ def Inference(self, input_data): output_data = super().Inference(input_datas)[0] return output_data +def model_download(): + ret = True + + desc = f"Downloading {MODEL_NAME} model... " + fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}" + ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail) + + if not ret: + exit() + def Init(): global realesrgan + model_download() + # Config AppBuilder environment. QNNConfig.Config(qnn_dir, Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for RealESRGan objects. - realesrgan_model = "models\\real_esrgan_general_x4v3_512.bin" - realesrgan = RealESRGan("realesrgan", realesrgan_model) + realesrgan = RealESRGan("realesrgan", madel_path) def Inference(input_image_path, output_image_path): global image_buffer @@ -75,11 +101,12 @@ def Inference(input_image_path, output_image_path): PerfProfile.RelPerfProfileGlobal() output_image = torch.from_numpy(output_image) - output_image = output_image.reshape(image_size * 4, image_size * 4, 3) + output_image = output_image.reshape(IMAGE_SIZE * 4, IMAGE_SIZE * 4, 3) output_image = torch.unsqueeze(output_image, 0) output_image = [torch_tensor_to_PIL_image(img) for img in output_image] image_buffer = output_image[0] image_buffer.save(output_image_path) + image_buffer.show() def Release(): global realesrgan @@ -90,7 +117,7 @@ def Release(): Init() -Inference("input.png", "output.png") +Inference(execution_ws + "\\input.png", execution_ws + "\\output.png") Release() diff --git a/samples/python/real_esrgan_x4plus/README.md b/samples/python/real_esrgan_x4plus/README.md index d167469..fb7f500 100644 --- a/samples/python/real_esrgan_x4plus/README.md +++ b/samples/python/real_esrgan_x4plus/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\real_esrgan_x4plus\qnn\libQnnHtpV73Skel.so -C:\ai-hub\real_esrgan_x4plus\qnn\QnnHtp.dll -C:\ai-hub\real_esrgan_x4plus\qnn\QnnHtpV73Stub.dll -C:\ai-hub\real_esrgan_x4plus\qnn\QnnSystem.dll -C:\ai-hub\real_esrgan_x4plus\qnn\libqnnhtpv73.cat +C:\ai-hub\real_esrgan_x4plus\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnHtp.dll +C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\real_esrgan_x4plus\qai_libs\QnnSystem.dll +C:\ai-hub\real_esrgan_x4plus\qai_libs\libqnnhtpv73.cat ``` ## real_esrgan_x4plus QNN models @@ -25,10 +25,9 @@ You can refer to below links on how to setup AI Hub envirinment and how to use A https://aihub.qualcomm.com/get-started http://app.aihub.qualcomm.com/docs/ -a. Download the latest 'ai-hub-models' code and install it to Python environment: +a. Install the AI Hub Python packages: ``` -git clone --recursive https://github.com/quic/ai-hub-models.git -pip install -e . +pip install qai-hub qai_hub_models ``` b. Use below commmand to generate QNN model which suppor 515x512 input resolution: @@ -45,7 +44,7 @@ Scheduled compile job (j1p86jxog) successfully. To see the status and results: ``` After downloaded the model, copy it to the following path: ``` -C:\ai-hub\real_esrgan_x4plus\models\realesrgan_x4_512.bin +C:\ai-hub\real_esrgan_x4plus\models\real_esrgan_x4plus.bin ``` ## Run the sample code diff --git a/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py b/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py index f09421d..fa02f9a 100644 --- a/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py +++ b/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py @@ -3,10 +3,13 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +import sys import os +sys.path.append(".") +sys.path.append("..") +import utils.install as install import cv2 import numpy as np - import torch import torchvision.transforms as transforms from PIL import Image @@ -16,17 +19,29 @@ #################################################################### +MODEL_ID = "m7qk01okn" +MODEL_NAME = "real_esrgan_x4plus" +MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models" +IMAGE_SIZE = 512 + +#################################################################### + execution_ws = os.getcwd() -qnn_dir = execution_ws + "\\qnn" +qnn_dir = execution_ws + "\\qai_libs" + +if not MODEL_NAME in execution_ws: + execution_ws = execution_ws + "\\" + MODEL_NAME + +model_dir = execution_ws + "\\models" +madel_path = model_dir + "\\" + MODEL_NAME + ".bin" -image_size = 512 image_buffer = None realesrgan = None def preprocess_PIL_image(image: Image) -> torch.Tensor: """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW.""" - transform = transforms.Compose([transforms.Resize(image_size), # bgr image - transforms.CenterCrop(image_size), + transform = transforms.Compose([transforms.Resize(IMAGE_SIZE), # bgr image + transforms.CenterCrop(IMAGE_SIZE), transforms.PILToTensor()]) img: torch.Tensor = transform(image) # type: ignore img = img.float().unsqueeze(0) / 255.0 # int 0 - 255 to float 0.0 - 1.0 @@ -47,15 +62,26 @@ def Inference(self, input_data): output_data = super().Inference(input_datas)[0] return output_data +def model_download(): + ret = True + + desc = f"Downloading {MODEL_NAME} model... " + fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}" + ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail) + + if not ret: + exit() + def Init(): global realesrgan + model_download() + # Config AppBuilder environment. QNNConfig.Config(qnn_dir, Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for RealESRGan objects. - realesrgan_model = "models\\realesrgan_x4_512.bin" - realesrgan = RealESRGan("realesrgan", realesrgan_model) + realesrgan = RealESRGan("realesrgan", madel_path) def Inference(input_image_path, output_image_path): global image_buffer @@ -74,11 +100,12 @@ def Inference(input_image_path, output_image_path): PerfProfile.RelPerfProfileGlobal() output_image = torch.from_numpy(output_image) - output_image = output_image.reshape(3, image_size * 4, image_size * 4) + output_image = output_image.reshape(3, IMAGE_SIZE * 4, IMAGE_SIZE * 4) output_image = torch.unsqueeze(output_image, 0) output_image = [torch_tensor_to_PIL_image(img) for img in output_image] image_buffer = output_image[0] image_buffer.save(output_image_path) + image_buffer.show() def Release(): global realesrgan @@ -89,7 +116,7 @@ def Release(): Init() -Inference("input.png", "output.png") +Inference(execution_ws + "\\input.png", execution_ws + "\\output.png") Release() diff --git a/samples/python/riffusion/Riffusion.py b/samples/python/riffusion/Riffusion.py index e33778f..a6def61 100644 --- a/samples/python/riffusion/Riffusion.py +++ b/samples/python/riffusion/Riffusion.py @@ -307,7 +307,7 @@ def model_download(): desc = "Downloading timestep_embedding model... " fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n" - ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail) + ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T) if not ret: if not os.path.exists(time_embedding_dir): # There is no timestep_embedding data, exit process. diff --git a/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py b/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py index 4fbc3b4..06d2462 100644 --- a/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py +++ b/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py @@ -307,7 +307,7 @@ def model_download(): desc = "Downloading timestep_embedding model... " fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n" - ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail) + ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T) if not ret: if not os.path.exists(time_embedding_dir): # There is no timestep_embedding data, exit process. diff --git a/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py b/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py index b846d02..846ad62 100644 --- a/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py +++ b/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py @@ -300,7 +300,7 @@ def model_download(): desc = "Downloading timestep_embedding model... " fail = "\nFailed to download timestep_embedding model. Please prepare the timestep_embedding data according to the guide below:\n" + TIMESTEP_HTLP_URL + "\n" - ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail) + ret = install.download_qai_hubmodel(TIMESTEP_EMBEDDING_MODEL_ID, time_embedding_model_path, desc=desc, fail=fail, hub_id=install.HUB_ID_T) if not ret: if not os.path.exists(time_embedding_dir): # There is no timestep_embedding data, exit process. diff --git a/samples/python/unet_segmentation/README.md b/samples/python/unet_segmentation/README.md index 39f6ad9..27766bb 100644 --- a/samples/python/unet_segmentation/README.md +++ b/samples/python/unet_segmentation/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\unet_segmentation\qnn\libQnnHtpV73Skel.so -C:\ai-hub\unet_segmentation\qnn\QnnHtp.dll -C:\ai-hub\unet_segmentation\qnn\QnnHtpV73Stub.dll -C:\ai-hub\unet_segmentation\qnn\QnnSystem.dll -C:\ai-hub\unet_segmentation\qnn\libqnnhtpv73.cat +C:\ai-hub\unet_segmentation\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\unet_segmentation\qai_libs\QnnHtp.dll +C:\ai-hub\unet_segmentation\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\unet_segmentation\qai_libs\QnnSystem.dll +C:\ai-hub\unet_segmentation\qai_libs\libqnnhtpv73.cat ``` ## unet_segmentation QNN models diff --git a/samples/python/unet_segmentation/unet_segmentation.py b/samples/python/unet_segmentation/unet_segmentation.py index 004b1a6..eae9065 100644 --- a/samples/python/unet_segmentation/unet_segmentation.py +++ b/samples/python/unet_segmentation/unet_segmentation.py @@ -109,7 +109,7 @@ def Init(): global unetsegmentation # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for UnetSegmentation objects. unetsegmentation_model = "models\\unet_segmentation.bin" diff --git a/samples/python/utils/install.py b/samples/python/utils/install.py index ab2a066..7cf2549 100644 --- a/samples/python/utils/install.py +++ b/samples/python/utils/install.py @@ -30,15 +30,16 @@ TEXT_RUN_SCRIPT_AGAIN = "Then run this Python script again." QNN_SDK_ROOT="C:\\Qualcomm\\AIStack\\QAIRT\\" -HUB_ID="aac24f12d047e7f558d8effe4b2fdad0f5c2c341" +HUB_ID_T="aac24f12d047e7f558d8effe4b2fdad0f5c2c341" +HUB_ID_Q="a916bc04400e033f60fdd73c615e5780e2ba206a" QAI_HUB_CONFIG = os.path.join(Path.home(), ".qai_hub", "client.ini") QAI_HUB_CONFIG_BACKUP = os.path.join(Path.home(), ".qai_hub", "client.ini.bk") -def setup_qai_hub(): +def setup_qai_hub(hub_id): if os.path.isfile(QAI_HUB_CONFIG): shutil.copy(QAI_HUB_CONFIG, QAI_HUB_CONFIG_BACKUP) - run_command(f"qai-hub.exe configure --api_token {HUB_ID} > NUL", False) + run_command(f"qai-hub.exe configure --api_token {hub_id} > NUL", False) def reset_qai_hub(): @@ -51,7 +52,7 @@ def is_file_exists(filepath): return True return False -def download_qai_hubmodel(model_id, filepath, desc=None, fail=None): +def download_qai_hubmodel(model_id, filepath, desc=None, fail=None, hub_id=HUB_ID_Q): ret = True if is_file_exists(filepath): @@ -65,7 +66,7 @@ def download_qai_hubmodel(model_id, filepath, desc=None, fail=None): else: print(f"Downloading {os.path.basename(filepath)}...") - setup_qai_hub() + setup_qai_hub(hub_id) try: model = qai_hub.get_model(model_id) model.download(filename=filepath) @@ -279,13 +280,13 @@ def is_installed(package): import importlib.metadata import importlib.util dist = importlib.metadata.distribution(package) - except importlib.metadata.PackageNotFoundError: + except importlib.metadata.PackageNotFoundError as e: + # print(e) try: spec = importlib.util.find_spec(package) except ModuleNotFoundError: - return False - - return spec is not None + return None + return None return dist diff --git a/samples/python/yolov8_det/README.md b/samples/python/yolov8_det/README.md index 55f2ab2..433bc15 100644 --- a/samples/python/yolov8_det/README.md +++ b/samples/python/yolov8_det/README.md @@ -9,11 +9,11 @@ https://github.com/quic/ai-engine-direct-helper/blob/main/Docs/User_Guide.md Copy the QNN libraries from QNN SDK to below path: ``` -C:\ai-hub\yolov8_det\qnn\libQnnHtpV73Skel.so -C:\ai-hub\yolov8_det\qnn\QnnHtp.dll -C:\ai-hub\yolov8_det\qnn\QnnHtpV73Stub.dll -C:\ai-hub\yolov8_det\qnn\QnnSystem.dll -C:\ai-hub\yolov8_det\qnn\libqnnhtpv73.cat +C:\ai-hub\yolov8_det\qai_libs\libQnnHtpV73Skel.so +C:\ai-hub\yolov8_det\qai_libs\QnnHtp.dll +C:\ai-hub\yolov8_det\qai_libs\QnnHtpV73Stub.dll +C:\ai-hub\yolov8_det\qai_libs\QnnSystem.dll +C:\ai-hub\yolov8_det\qai_libs\libqnnhtpv73.cat ``` ## yolov8_det QNN models diff --git a/samples/python/yolov8_det/yolov8_det.py b/samples/python/yolov8_det/yolov8_det.py index 8cfdb74..76131ce 100644 --- a/samples/python/yolov8_det/yolov8_det.py +++ b/samples/python/yolov8_det/yolov8_det.py @@ -288,7 +288,7 @@ def Init(): global yolov8 # Config AppBuilder environment. - QNNConfig.Config(os.getcwd() + "\\qnn", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) + QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC) # Instance for YoloV8 objects. yolov8_model = "models\\yolov8_det.bin" diff --git a/src/Utils/BuildId.hpp b/src/Utils/BuildId.hpp index 153786e..153e187 100644 --- a/src/Utils/BuildId.hpp +++ b/src/Utils/BuildId.hpp @@ -11,7 +11,7 @@ namespace qnn { namespace tools { -inline std::string getBuildId() { return std::string("v2.28.0.240827110523_99241"); } +inline std::string getBuildId() { return std::string("v2.28.0.241029232508_102474"); } } // namespace tools } // namespace qnn diff --git a/src/Utils/DataUtil.cpp b/src/Utils/DataUtil.cpp index 659587e..4502728 100644 --- a/src/Utils/DataUtil.cpp +++ b/src/Utils/DataUtil.cpp @@ -15,9 +15,11 @@ #include #include "DataUtil.hpp" #include "Logger.hpp" +#ifndef __hexagon__ #include "PAL/Directory.hpp" #include "PAL/FileOp.hpp" #include "PAL/Path.hpp" +#endif using namespace qnn; using namespace qnn::tools; @@ -186,6 +188,7 @@ datautil::StatusCode datautil::readBinaryFromFile(std::string filePath, return StatusCode::SUCCESS; } +#ifndef __hexagon__ datautil::StatusCode datautil::writeDataToFile(std::string fileDir, std::string fileName, std::vector dims, @@ -275,8 +278,9 @@ datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir, os.write(reinterpret_cast(buffer), bufferSize); return StatusCode::SUCCESS; } +#endif - +// Enabling fp16 execution static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) { const uint32_t w = (uint32_t) h << 16; const uint32_t sign = w & UINT32_C(0x80000000); @@ -297,6 +301,7 @@ static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) { return fp32_from_bits(result); } +// Enabling fp16 execution /* * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in * IEEE half-precision format, in bit representation. @@ -304,8 +309,6 @@ static inline float datautil::fp16_ieee_to_fp32_value(uint16_t h) { * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) * floating-point operations and bitcasts between integer and floating-point variables. */ - - bool datautil::floatNToFloat32(float* out, uint8_t* in, size_t numElements, @@ -338,6 +341,7 @@ bool datautil::floatNToFloat32(float* out, return true; } +// Enabling fp16 execution static inline float datautil::fp32_from_bits(uint32_t w) { #if defined(__OPENCL_VERSION__) return as_float(w); @@ -356,6 +360,7 @@ static inline float datautil::fp32_from_bits(uint32_t w) { #endif } +// Enabling fp16 execution static inline uint32_t datautil::fp32_to_bits(float f) { #if defined(__OPENCL_VERSION__) return as_uint(f); @@ -374,6 +379,7 @@ static inline uint32_t datautil::fp32_to_bits(float f) { #endif } +// Enabling fp16 execution static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) { #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) const float scale_to_inf = 0x1.0p+112f; @@ -400,6 +406,7 @@ static inline uint16_t datautil::fp16_ieee_from_fp32_value(float f) { return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); } +// Enabling fp16 execution bool datautil::float32ToFloatN(uint8_t* out, float* in, size_t numElements, @@ -514,6 +521,10 @@ template datautil::StatusCode datautil::castToFloat(float* out, uint32_t* in, size_t numElements); +template datautil::StatusCode datautil::castToFloat(float* out, + uint64_t* in, + size_t numElements); + template datautil::StatusCode datautil::castToFloat(float* out, int8_t* in, size_t numElements); @@ -526,6 +537,10 @@ template datautil::StatusCode datautil::castToFloat(float* out, int32_t* in, size_t numElements); +template datautil::StatusCode datautil::castToFloat(float* out, + int64_t* in, + size_t numElements); + template datautil::StatusCode datautil::castFromFloat(T_QuantType* out, float* in, size_t numElements) { if (nullptr == out || nullptr == in) { @@ -550,6 +565,10 @@ template datautil::StatusCode datautil::castFromFloat(uint32_t* out, float* in, size_t numElements); +template datautil::StatusCode datautil::castFromFloat(uint64_t* out, + float* in, + size_t numElements); + template datautil::StatusCode datautil::castFromFloat(int8_t* out, float* in, size_t numElements); @@ -559,5 +578,9 @@ template datautil::StatusCode datautil::castFromFloat(int16_t* out, size_t numElements); template datautil::StatusCode datautil::castFromFloat(int32_t* out, + float* in, + size_t numElements); + +template datautil::StatusCode datautil::castFromFloat(int64_t* out, float* in, size_t numElements); \ No newline at end of file diff --git a/src/Utils/DataUtil.hpp b/src/Utils/DataUtil.hpp index c030a86..7bbfd15 100644 --- a/src/Utils/DataUtil.hpp +++ b/src/Utils/DataUtil.hpp @@ -68,6 +68,7 @@ ReadBatchDataRetType_t readBatchData(const std::vector& filePaths, StatusCode readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize); +#ifndef __hexagon__ StatusCode writeDataToFile(std::string fileDir, std::string fileName, std::vector dims, @@ -85,22 +86,15 @@ StatusCode writeBinaryToFile(std::string fileDir, std::string fileName, uint8_t* buffer, size_t bufferSize); +#endif +// Enabling fp16 execution static inline uint16_t fp16_ieee_from_fp32_value(float f); static inline float fp16_ieee_to_fp32_value(uint16_t h); - static inline uint32_t fp32_to_bits(float f); static inline float fp32_from_bits(uint32_t w); - -bool floatNToFloat32(float* out, - uint8_t* in, - size_t numElements, - uint8_t bitWidth); - -bool float32ToFloatN(uint8_t* out, - float* in, - size_t numElements, - uint8_t bitWidth); +bool floatNToFloat32(float* out, uint8_t* in, size_t numElements, uint8_t bitWidth); +bool float32ToFloatN(uint8_t* out, float* in, size_t numElements, uint8_t bitWidth); template datautil::StatusCode floatToTfN( diff --git a/src/Utils/DynamicLoadUtil.cpp b/src/Utils/DynamicLoadUtil.cpp index da6f122..cb04eba 100644 --- a/src/Utils/DynamicLoadUtil.cpp +++ b/src/Utils/DynamicLoadUtil.cpp @@ -39,13 +39,8 @@ dynamicloadutil::StatusCode dynamicloadutil::getQnnFunctionPointers( void** backendHandleRtn, bool loadModelLib, void** modelHandleRtn) { -#if defined(__ANDROID__) - void* libBackendHandle = pal::dynamicloading::dlOpen( - backendPath.c_str(), pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL); -#else void* libBackendHandle = pal::dynamicloading::dlOpen( backendPath.c_str(), pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_GLOBAL); -#endif if (nullptr == libBackendHandle) { QNN_ERROR("Unable to load backend. pal::dynamicloading::dlError(): %s", pal::dynamicloading::dlError()); diff --git a/src/Utils/IOTensor.cpp b/src/Utils/IOTensor.cpp index ee9d6b2..c0ccebe 100644 --- a/src/Utils/IOTensor.cpp +++ b/src/Utils/IOTensor.cpp @@ -14,9 +14,11 @@ #include "DataUtil.hpp" #include "IOTensor.hpp" #include "Logger.hpp" +#ifndef __hexagon__ #include "PAL/Directory.hpp" #include "PAL/FileOp.hpp" #include "PAL/Path.hpp" +#endif #include "PAL/StringOp.hpp" #include "QnnTypeMacros.hpp" @@ -71,7 +73,7 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), QNN_TENSOR_GET_RANK(tensor)); switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { - case QNN_DATATYPE_FLOAT_16: + case QNN_DATATYPE_FLOAT_16: // zw. Enabling fp16 execution #ifdef __hexagon__ QNN_ERROR("failure in aiswutility::float32ToFloatN, not supported on Hexagon"); returnStatus = StatusCode::FAILURE; @@ -135,6 +137,17 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe } break; + case QNN_DATATYPE_UINT_64: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, + datautil::calculateElementCount(dims))) { + QNN_ERROR("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + case QNN_DATATYPE_INT_8: if (datautil::StatusCode::SUCCESS != datautil::castFromFloat( @@ -168,6 +181,17 @@ iotensor::StatusCode iotensor::IOTensor::copyFromFloatToNative(float* floatBuffe } break; + case QNN_DATATYPE_INT_64: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, + datautil::calculateElementCount(dims))) { + QNN_ERROR("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + case QNN_DATATYPE_BOOL_8: if (datautil::StatusCode::SUCCESS != datautil::castFromFloat( @@ -314,6 +338,7 @@ iotensor::PopulateInputTensorsRetType_t iotensor::IOTensor::populateInputTensors return {StatusCode::SUCCESS, numFilesPopulated, numBatchSize}; } +// zw. Optimize performance. // Helper method to populate an input tensor in the graph during execution. // It relies on reading data from buffer provided during executeGraph() call. iotensor::StatusCode iotensor::IOTensor::populateInputTensor( @@ -345,6 +370,7 @@ iotensor::StatusCode iotensor::IOTensor::populateInputTensor( return StatusCode::SUCCESS; } +// zw. Optimize performance. // Helper method to populate all input tensors. iotensor::StatusCode iotensor::IOTensor::populateInputTensors( uint32_t graphIdx, @@ -374,6 +400,7 @@ iotensor::StatusCode iotensor::IOTensor::populateInputTensors( return StatusCode::SUCCESS; } +// zw. Optimize performance. iotensor::StatusCode iotensor::IOTensor::getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector& size) { if (nullptr == tensorWrappers) { QNN_ERROR("tensorWrappers is nullptr"); @@ -542,7 +569,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer, size_t elementCount = datautil::calculateElementCount(dims); auto returnStatus = StatusCode::SUCCESS; switch (dataType) { - case QNN_DATATYPE_FLOAT_16: + case QNN_DATATYPE_FLOAT_16: // zw. Enabling fp16 execution case QNN_DATATYPE_FLOAT_32: QNN_DEBUG("allocating float buffer"); returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); @@ -565,6 +592,11 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer, returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); break; + case QNN_DATATYPE_UINT_64: + QNN_DEBUG("allocating uint64_t buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); + break; + case QNN_DATATYPE_INT_8: QNN_DEBUG("allocating int8_t buffer"); returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); @@ -580,6 +612,11 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(uint8_t** buffer, returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); break; + case QNN_DATATYPE_INT_64: + QNN_DEBUG("allocating int64_t buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); + break; + case QNN_DATATYPE_BOOL_8: QNN_DEBUG("allocating bool buffer"); returnStatus = allocateBuffer(reinterpret_cast(buffer), elementCount); @@ -611,6 +648,7 @@ iotensor::StatusCode iotensor::IOTensor::allocateBuffer(T** buffer, size_t& elem // Convert data to float or de-quantization. This is used when // user requests for float output and the model produces // non-float output. +#ifndef __hexagon__ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_t* tensor) { if (nullptr == tensor) { QNN_ERROR("tensors is nullptr"); @@ -630,7 +668,7 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_ return returnStatus; } switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { - case QNN_DATATYPE_FLOAT_16: + case QNN_DATATYPE_FLOAT_16: // zw. Enabling fp16 execution if (!datautil::floatNToFloat32( *out, reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), elementCount, 16)) { QNN_ERROR("failure in aiswutility::floatNToFloat32"); @@ -697,6 +735,17 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_ } break; + case QNN_DATATYPE_UINT_64: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + QNN_ERROR("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + case QNN_DATATYPE_INT_8: if (datautil::StatusCode::SUCCESS != datautil::castToFloat( @@ -730,6 +779,17 @@ iotensor::StatusCode iotensor::IOTensor::convertToFloat(float** out, Qnn_Tensor_ } break; + case QNN_DATATYPE_INT_64: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + QNN_ERROR("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + case QNN_DATATYPE_BOOL_8: if (datautil::StatusCode::SUCCESS != datautil::castToFloat( @@ -888,6 +948,7 @@ iotensor::StatusCode iotensor::IOTensor::writeOutputTensors(uint32_t graphIdx, } return returnStatus; } +#endif // Helper method to allocate a buffer and copy data to it. iotensor::StatusCode iotensor::IOTensor::allocateAndCopyBuffer(uint8_t** buffer, diff --git a/src/Utils/IOTensor.hpp b/src/Utils/IOTensor.hpp index 868e2c3..255401b 100644 --- a/src/Utils/IOTensor.hpp +++ b/src/Utils/IOTensor.hpp @@ -40,6 +40,7 @@ class IOTensor { Qnn_Tensor_t **outputs, qnn_wrapper_api::GraphInfo_t graphInfo); +#ifndef __hexagon__ StatusCode writeOutputTensors(uint32_t graphIdx, size_t startIdx, char *graphName, @@ -50,6 +51,7 @@ class IOTensor { std::string outputPath, size_t numInputFilesPopulated, size_t outputBatchSize); +#endif PopulateInputTensorsRetType_t populateInputTensors( uint32_t graphIdx, @@ -61,6 +63,7 @@ class IOTensor { qnn_wrapper_api::GraphInfo_t graphInfo, iotensor::InputDataType inputDataType); + // zw. Optimize performance. StatusCode populateInputTensors(uint32_t graphIdx, std::vector inputBuffers, Qnn_Tensor_t *inputs, @@ -72,11 +75,13 @@ class IOTensor { size_t numInputTensors, size_t numOutputTensors); +#ifndef __hexagon__ StatusCode convertToFloat(float **out, Qnn_Tensor_t *output); // zw: change it to public function. + #endif StatusCode fillDims(std::vector &dims, uint32_t *inDimensions, uint32_t rank); // zw: change it to public function. - StatusCode getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector& size); + StatusCode getTensorsSize(Qnn_Tensor_t** tensors, uint32_t tensorCount, Qnn_Tensor_t* tensorWrappers, std::vector& size); // zw. Optimize performance. private: PopulateInputTensorsRetType_t populateInputTensor(const std::vector &filePaths, @@ -85,7 +90,7 @@ class IOTensor { Qnn_Tensor_t *input, InputDataType inputDataType); - StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, InputDataType inputDataType); + StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, InputDataType inputDataType); // zw. Optimize performance. PopulateInputTensorsRetType_t readDataAndAllocateBuffer(const std::vector &filePaths, const size_t filePathsIndexOffset, @@ -97,6 +102,7 @@ class IOTensor { template StatusCode allocateBuffer(T **buffer, size_t &elementCount); +#ifndef __hexagon__ StatusCode convertAndWriteOutputTensorInFloat(Qnn_Tensor_t *output, std::vector outputPaths, std::string fileName, @@ -106,6 +112,7 @@ class IOTensor { std::vector outputPaths, std::string fileName, size_t outputBatchSize); +#endif StatusCode allocateAndCopyBuffer(uint8_t **buffer, Qnn_Tensor_t *tensor); @@ -120,4 +127,4 @@ class IOTensor { }; } // namespace iotensor } // namespace tools -} // namespace qnn +} // namespace qnn \ No newline at end of file diff --git a/src/Utils/QnnSampleAppUtils.cpp b/src/Utils/QnnSampleAppUtils.cpp index 55b8432..b2672d2 100644 --- a/src/Utils/QnnSampleAppUtils.cpp +++ b/src/Utils/QnnSampleAppUtils.cpp @@ -15,9 +15,11 @@ #include #include "Logger.hpp" +#ifndef __hexagon__ #include "PAL/Directory.hpp" #include "PAL/FileOp.hpp" #include "PAL/Path.hpp" +#endif #include "PAL/StringOp.hpp" #include "QnnSampleAppUtils.hpp" #include "QnnTypeMacros.hpp" @@ -224,7 +226,16 @@ bool sample_app::deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *sr QNN_TENSOR_GET_DIMENSIONS(src), QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t)); } + if (QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(src)) { + QNN_TENSOR_SET_IS_DYNAMIC_DIMENSIONS( + dst, (uint8_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t))); + pal::StringOp::memscpy(QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(dst), + QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t), + QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(src), + QNN_TENSOR_GET_RANK(src) * sizeof(uint8_t)); + } } + QNN_TENSOR_SET_SPARSE_PARAMS(dst, QNN_TENSOR_GET_SPARSE_PARAMS(src)); return true; } @@ -249,41 +260,41 @@ bool sample_app::copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc, return returnStatus; } -bool sample_app::copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t* graphInfoSrc, - qnn_wrapper_api::GraphInfo_t* graphInfoDst) { - graphInfoDst->graphName = nullptr; - if (graphInfoSrc->graphName) { - graphInfoDst->graphName = - pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName)); - } - graphInfoDst->inputTensors = nullptr; - graphInfoDst->numInputTensors = 0; - if (graphInfoSrc->graphInputs) { - if (!copyTensorsInfo( +bool sample_app::copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc, + qnn_wrapper_api::GraphInfo_t *graphInfoDst) { + graphInfoDst->graphName = nullptr; + if (graphInfoSrc->graphName) { + graphInfoDst->graphName = + pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName)); + } + graphInfoDst->inputTensors = nullptr; + graphInfoDst->numInputTensors = 0; + if (graphInfoSrc->graphInputs) { + if (!copyTensorsInfo( graphInfoSrc->graphInputs, graphInfoDst->inputTensors, graphInfoSrc->numGraphInputs)) { - return false; - } - graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs; + return false; } - graphInfoDst->outputTensors = nullptr; - graphInfoDst->numOutputTensors = 0; - if (graphInfoSrc->graphOutputs) { - if (!copyTensorsInfo(graphInfoSrc->graphOutputs, - graphInfoDst->outputTensors, - graphInfoSrc->numGraphOutputs)) { - return false; - } - graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs; + graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs; + } + graphInfoDst->outputTensors = nullptr; + graphInfoDst->numOutputTensors = 0; + if (graphInfoSrc->graphOutputs) { + if (!copyTensorsInfo(graphInfoSrc->graphOutputs, + graphInfoDst->outputTensors, + graphInfoSrc->numGraphOutputs)) { + return false; } - return true; + graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs; + } + return true; } -bool sample_app::copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc, +bool sample_app::copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *graphInfoSrc, qnn_wrapper_api::GraphInfo_t *graphInfoDst) { graphInfoDst->graphName = nullptr; if (graphInfoSrc->graphName) { graphInfoDst->graphName = - pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName)); + pal::StringOp::strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName)); } graphInfoDst->inputTensors = nullptr; graphInfoDst->numInputTensors = 0; @@ -329,9 +340,8 @@ bool sample_app::copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput, QNN_DEBUG("Extracting graphsInfo for graph Idx: %d", gIdx); if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) { copyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, &graphInfoArr[gIdx]); - } - else if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) { - copyGraphsInfoV3(&graphsInput[gIdx].graphInfoV3, &graphInfoArr[gIdx]); + } else if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3) { + copyGraphsInfoV3(&graphsInput[gIdx].graphInfoV3, &graphInfoArr[gIdx]); } graphsInfo[gIdx] = graphInfoArr + gIdx; } @@ -390,8 +400,7 @@ bool sample_app::copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *b graphsCount = binaryInfo->contextBinaryInfoV2.numGraphs; return true; } - } - else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) { + } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) { if (binaryInfo->contextBinaryInfoV3.graphs) { if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV3.graphs, binaryInfo->contextBinaryInfoV3.numGraphs,