From 216166cbb841db85b155caf4eaee6f194f9ed326 Mon Sep 17 00:00:00 2001 From: quic-zhanweiw Date: Wed, 4 Dec 2024 16:57:53 +0800 Subject: [PATCH] Improve inference performance --- src/LibAppBuilder.cpp | 12 ++++++ src/QnnSampleApp.cpp | 57 +++++++++++++++++++++------- src/QnnSampleApp.hpp | 3 ++ src/WrapperUtils/QnnWrapperUtils.hpp | 4 ++ 4 files changed, 63 insertions(+), 13 deletions(-) diff --git a/src/LibAppBuilder.cpp b/src/LibAppBuilder.cpp index 3b3766d..76a8928 100644 --- a/src/LibAppBuilder.cpp +++ b/src/LibAppBuilder.cpp @@ -377,6 +377,12 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na } } + // improve performance. + if (sample_app::StatusCode::SUCCESS != app->setupInputAndOutputTensors()) { + app->reportError("Setup Input and Output Tensors failure"); + return false; + } + if (loadFromCachedBinary) { if (sample_app::StatusCode::SUCCESS != app->initializePerformance()) { app->reportError("Performance initialization failure"); @@ -452,6 +458,12 @@ bool ModelDestroyEx(std::string model_name, std::string proc_name) { return false; } + // improve performance. + if (sample_app::StatusCode::SUCCESS != app->tearDownInputAndOutputTensors()) { + app->reportError("Input and Output Tensors destroy failure"); + return false; + } + if (sample_app::StatusCode::SUCCESS != app->destroyPerformance()) { app->reportError("Performance destroy failure"); return false; diff --git a/src/QnnSampleApp.cpp b/src/QnnSampleApp.cpp index b1ea1ef..7b90659 100644 --- a/src/QnnSampleApp.cpp +++ b/src/QnnSampleApp.cpp @@ -843,6 +843,46 @@ void bufferToFile(std::vector& buffers, std::vector& size, std } #endif +// improve performance. +sample_app::StatusCode sample_app::QnnSampleApp::setupInputAndOutputTensors() +{ + auto returnStatus = qnn::tools::iotensor::StatusCode::SUCCESS; + + for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) { + auto& graphInfo = (*m_graphsInfo)[graphIdx]; + Qnn_Tensor_t** inputs = &(graphInfo.m_inputs ); + Qnn_Tensor_t** outputs = &(graphInfo.m_outputs); + returnStatus = m_ioTensor.setupInputAndOutputTensors(inputs, outputs, graphInfo); + if (qnn::tools::iotensor::StatusCode::SUCCESS != returnStatus) { + QNN_ERROR("Error in setting up Input and output Tensors for graphIdx: %d", graphIdx); + break; + } + } + + return static_cast(returnStatus); +} + +// improve performance. +sample_app::StatusCode sample_app::QnnSampleApp::tearDownInputAndOutputTensors() +{ + auto returnStatus = qnn::tools::iotensor::StatusCode::SUCCESS; + + for (size_t graphIdx = 0; graphIdx < m_graphsCount; graphIdx++) { + auto& graphInfo = (*m_graphsInfo)[graphIdx]; + Qnn_Tensor_t* inputs = graphInfo.m_inputs ; + Qnn_Tensor_t* outputs = graphInfo.m_outputs; + returnStatus = m_ioTensor.tearDownInputAndOutputTensors(inputs, outputs, graphInfo.numInputTensors, graphInfo.numOutputTensors); + graphInfo.m_inputs = nullptr; + graphInfo.m_outputs = nullptr; + if (qnn::tools::iotensor::StatusCode::SUCCESS != returnStatus) { + QNN_ERROR("Error in tear down Input and output Tensors for graphIdx: %d", graphIdx); + break; + } + } + + return static_cast(returnStatus); +} + sample_app::StatusCode sample_app::QnnSampleApp::executeGraphsBuffers(std::vector& inputBuffers, std::vector& outputBuffers, std::vector& outputSize, std::string perfProfile) { @@ -870,13 +910,10 @@ sample_app::StatusCode sample_app::QnnSampleApp::executeGraphsBuffers(std::vecto break; } - Qnn_Tensor_t* inputs = nullptr; - Qnn_Tensor_t* outputs = nullptr; - if (iotensor::StatusCode::SUCCESS != m_ioTensor.setupInputAndOutputTensors(&inputs, &outputs, (*m_graphsInfo)[graphIdx])) { - QNN_ERROR("Error in setting up Input and output Tensors for graphIdx: %d", graphIdx); - returnStatus = StatusCode::FAILURE; - break; - } + // improve performance. + + Qnn_Tensor_t* inputs = (*m_graphsInfo)[graphIdx].m_inputs ; + Qnn_Tensor_t* outputs = (*m_graphsInfo)[graphIdx].m_outputs; auto graphInfo = (*m_graphsInfo)[graphIdx]; if (!inputBuffers.empty()) { @@ -994,12 +1031,6 @@ sample_app::StatusCode sample_app::QnnSampleApp::executeGraphsBuffers(std::vecto } } } - m_ioTensor.tearDownInputAndOutputTensors(inputs, outputs, graphInfo.numInputTensors, graphInfo.numOutputTensors); - inputs = nullptr; - outputs = nullptr; - if (StatusCode::SUCCESS != returnStatus) { - break; - } } return returnStatus; diff --git a/src/QnnSampleApp.hpp b/src/QnnSampleApp.hpp index 959a84b..e349599 100644 --- a/src/QnnSampleApp.hpp +++ b/src/QnnSampleApp.hpp @@ -93,6 +93,9 @@ class QnnSampleApp { StatusCode verifyFailReturnStatus(Qnn_ErrorHandle_t errCode); +// improve performance. + StatusCode setupInputAndOutputTensors(); + StatusCode tearDownInputAndOutputTensors(); // zw. StatusCode executeGraphsBuffers(std::vector& inputBuffers, diff --git a/src/WrapperUtils/QnnWrapperUtils.hpp b/src/WrapperUtils/QnnWrapperUtils.hpp index 8702a92..eb639a0 100644 --- a/src/WrapperUtils/QnnWrapperUtils.hpp +++ b/src/WrapperUtils/QnnWrapperUtils.hpp @@ -122,6 +122,10 @@ typedef struct GraphInfo { uint32_t numInputTensors; Qnn_Tensor_t *outputTensors; uint32_t numOutputTensors; + + // improve performance. + Qnn_Tensor_t* m_inputs; + Qnn_Tensor_t* m_outputs; } GraphInfo_t; typedef GraphInfo_t *GraphInfoPtr_t;