From ac324deb36cb9f5e7e27d44e455727d4e8ad992f Mon Sep 17 00:00:00 2001 From: ZHEQIUSHUI <46700201+ZHEQIUSHUI@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:29:28 +0800 Subject: [PATCH] add v8seg (#120) --- CMakeLists.txt | 4 +- examples/ax620e/CMakeLists.txt | 1 + examples/ax620e/ax_yolov8_seg_steps.cc | 259 ++++++++++++++++++ examples/ax620e/middleware/io.hpp | 10 +- examples/ax650/CMakeLists.txt | 7 +- ...ative_steps.cc => ax_yolov8_pose_steps.cc} | 0 examples/ax650/ax_yolov8_seg_steps.cc | 259 ++++++++++++++++++ ...v8s_native_steps.cc => ax_yolov8_steps.cc} | 0 .../{ => deprecated}/ax_yolov8s_pose_steps.cc | 0 .../{ => deprecated}/ax_yolov8s_steps.cc | 0 examples/base/detection.hpp | 22 +- 11 files changed, 541 insertions(+), 21 deletions(-) create mode 100644 examples/ax620e/ax_yolov8_seg_steps.cc rename examples/ax650/{ax_yolov8s_pose_native_steps.cc => ax_yolov8_pose_steps.cc} (100%) create mode 100644 examples/ax650/ax_yolov8_seg_steps.cc rename examples/ax650/{ax_yolov8s_native_steps.cc => ax_yolov8_steps.cc} (100%) rename examples/ax650/{ => deprecated}/ax_yolov8s_pose_steps.cc (100%) rename examples/ax650/{ => deprecated}/ax_yolov8s_steps.cc (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd7b1f3..60f9cbb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,8 +58,8 @@ if (NOT AXERA_TARGET_CHIP) set (AXERA_TARGET_CHIP "ax620a") endif () -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O1 -Wall -s -fPIC") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O1 -Wall -s -fPIC") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O1 -Wall -s -fPIC -Wunused-function") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O1 -Wall -s -fPIC -Wunused-function") # src files add_subdirectory(examples) diff --git a/examples/ax620e/CMakeLists.txt b/examples/ax620e/CMakeLists.txt index 9c3aba9..839527d 100644 --- a/examples/ax620e/CMakeLists.txt +++ b/examples/ax620e/CMakeLists.txt @@ -29,6 +29,7 @@ axera_example(ax_yolov6 ax_yolov6_steps.cc) axera_example(ax_yolov7_tiny_face ax_yolov7_tiny_face_steps.cc) axera_example(ax_yolov7 ax_yolov7_steps.cc) axera_example(ax_yolov8 ax_yolov8s_steps.cc) +axera_example(ax_yolov8_seg ax_yolov8_seg_steps.cc) axera_example(ax_yolov8_pose ax_yolov8s_pose_steps.cc) axera_example(ax_yolox ax_yolox_steps.cc) diff --git a/examples/ax620e/ax_yolov8_seg_steps.cc b/examples/ax620e/ax_yolov8_seg_steps.cc new file mode 100644 index 0000000..aa571e0 --- /dev/null +++ b/examples/ax620e/ax_yolov8_seg_steps.cc @@ -0,0 +1,259 @@ +/* +* AXERA is pleased to support the open source community by making ax-samples available. +* +* Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved. +* +* Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +* in compliance with the License. You may obtain a copy of the License at +* +* https://opensource.org/licenses/BSD-3-Clause +* +* Unless required by applicable law or agreed to in writing, software distributed +* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ + +/* +* Author: ZHEQIUSHUI +*/ + +#include +#include +#include + +#include +#include "base/common.hpp" +#include "base/detection.hpp" +#include "middleware/io.hpp" + +#include "utilities/args.hpp" +#include "utilities/cmdline.hpp" +#include "utilities/file.hpp" +#include "utilities/timer.hpp" + +#include +#include + +const int DEFAULT_IMG_H = 640; +const int DEFAULT_IMG_W = 640; + +const char* CLASS_NAMES[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; +static const std::vector > COCO_COLORS = { + {56, 0, 255}, {226, 255, 0}, {0, 94, 255}, {0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, {255, 151, 0}, {170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169}, {255, 0, 207}, {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255}, {94, 0, 255}, {0, 255, 113}, {255, 18, 0}, {255, 0, 56}, {18, 0, 255}, {0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0}, {75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245}, {255, 132, 0}, {226, 0, 255}, {255, 37, 0}, {207, 255, 0}, {0, 255, 207}, {94, 255, 0}, {0, 226, 255}, {56, 255, 0}, {255, 94, 0}, {255, 113, 0}, {0, 132, 255}, {255, 0, 132}, {255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, {113, 0, 255}, {255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113}, {0, 255, 188}, {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132}, {0, 188, 255}, {0, 245, 255}, {0, 169, 255}, {37, 255, 0}, {255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0}, {255, 0, 170}, {255, 0, 37}, {255, 75, 0}, {0, 0, 255}, {255, 207, 0}, {255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, {0, 255, 75}, {0, 255, 151}, {255, 56, 0}, {245, 255, 0}}; +int NUM_CLASS = 80; + +const int DEFAULT_LOOP_COUNT = 1; +const int DEFAULT_MASK_PROTO_DIM = 32; +const int DEFAULT_MASK_SAMPLE_STRIDE = 4; +const float PROB_THRESHOLD = 0.45f; +const float NMS_THRESHOLD = 0.45f; +namespace ax +{ + void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, int input_w, int input_h, const std::vector& time_costs) + { + std::vector proposals; + std::vector objects; + timer timer_postprocess; + float* output_ptr[3] = {(float*)io_data->pOutputs[0].pVirAddr, + (float*)io_data->pOutputs[1].pVirAddr, + (float*)io_data->pOutputs[2].pVirAddr}; + float* output_seg_ptr[3] = {(float*)io_data->pOutputs[3].pVirAddr, + (float*)io_data->pOutputs[4].pVirAddr, + (float*)io_data->pOutputs[5].pVirAddr}; + for (int i = 0; i < 3; ++i) + { + auto feat_ptr = output_ptr[i]; + auto feat_seg_ptr = output_seg_ptr[i]; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_seg_native(stride, feat_ptr, feat_seg_ptr, PROB_THRESHOLD, proposals, input_w, input_h, NUM_CLASS); + } + + auto mask_proto_ptr = (float*)io_data->pOutputs[6].pVirAddr; + + detection::get_out_bbox_mask(proposals, objects, mask_proto_ptr, DEFAULT_MASK_PROTO_DIM, DEFAULT_MASK_SAMPLE_STRIDE, NMS_THRESHOLD, input_h, input_w, mat.rows, mat.cols); + fprintf(stdout, "post process cost time:%.2f ms \n", timer_postprocess.cost()); + fprintf(stdout, "--------------------------------------\n"); + auto total_time = std::accumulate(time_costs.begin(), time_costs.end(), 0.f); + auto min_max_time = std::minmax_element(time_costs.begin(), time_costs.end()); + fprintf(stdout, + "Repeat %d times, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", + (int)time_costs.size(), + total_time / (float)time_costs.size(), + *min_max_time.second, + *min_max_time.first); + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "detection num: %zu\n", objects.size()); + + detection::draw_objects_mask(mat, objects, CLASS_NAMES, COCO_COLORS, "yolov8_seg_out"); + } + + bool run_model(const std::string& model, const std::vector& data, const int& repeat, cv::Mat& mat, int input_h, int input_w) + { + // 1. init engine +#ifdef AXERA_TARGET_CHIP_AX620E + auto ret = AX_ENGINE_Init(); +#else + AX_ENGINE_NPU_ATTR_T npu_attr; + memset(&npu_attr, 0, sizeof(npu_attr)); + npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE; + auto ret = AX_ENGINE_Init(&npu_attr); +#endif + if (0 != ret) + { + return ret; + } + + // 2. load model + std::vector model_buffer; + if (!utilities::read_file(model, model_buffer)) + { + fprintf(stderr, "Read Run-Joint model(%s) file failed.\n", model.c_str()); + return false; + } + + // 3. create handle + AX_ENGINE_HANDLE handle; + ret = AX_ENGINE_CreateHandle(&handle, model_buffer.data(), model_buffer.size()); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating handle is done.\n"); + + // 4. create context + ret = AX_ENGINE_CreateContext(handle); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating context is done.\n"); + + // 5. set io + AX_ENGINE_IO_INFO_T* io_info; + ret = AX_ENGINE_GetIOInfo(handle, &io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine get io info is done. \n"); + + // 6. alloc io + AX_ENGINE_IO_T io_data; + ret = middleware::prepare_io(io_info, &io_data, std::make_pair(AX_ENGINE_ABST_DEFAULT, AX_ENGINE_ABST_CACHED)); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine alloc io is done. \n"); + + // 7. insert input + ret = middleware::push_input(data, &io_data, io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + fprintf(stdout, "Engine push input is done. \n"); + fprintf(stdout, "--------------------------------------\n"); + + // 8. warn up + for (int i = 0; i < 5; ++i) + { + AX_ENGINE_RunSync(handle, &io_data); + } + + // 9. run model + std::vector time_costs(repeat, 0); + for (int i = 0; i < repeat; ++i) + { + timer tick; + ret = AX_ENGINE_RunSync(handle, &io_data); + time_costs[i] = tick.cost(); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + } + + // 10. get result + post_process(io_info, &io_data, mat, input_w, input_h, time_costs); + fprintf(stdout, "--------------------------------------\n"); + + middleware::free_io(&io_data); + return AX_ENGINE_DestroyHandle(handle); + } +} // namespace ax + +int main(int argc, char* argv[]) +{ + cmdline::parser cmd; + cmd.add("model", 'm', "joint file(a.k.a. joint model)", true, ""); + cmd.add("image", 'i', "image file", true, ""); + cmd.add("size", 'g', "input_h, input_w", false, std::to_string(DEFAULT_IMG_H) + "," + std::to_string(DEFAULT_IMG_W)); + + cmd.add("repeat", 'r', "repeat count", false, DEFAULT_LOOP_COUNT); + cmd.parse_check(argc, argv); + + // 0. get app args, can be removed from user's app + auto model_file = cmd.get("model"); + auto image_file = cmd.get("image"); + + auto model_file_flag = utilities::file_exist(model_file); + auto image_file_flag = utilities::file_exist(image_file); + + if (!model_file_flag | !image_file_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input file %s(%s) is not exist, please check it.\n", kind.c_str(), value.c_str()); + }; + + if (!model_file_flag) { show_error("model", model_file); } + if (!image_file_flag) { show_error("image", image_file); } + + return -1; + } + + auto input_size_string = cmd.get("size"); + + std::array input_size = {DEFAULT_IMG_H, DEFAULT_IMG_W}; + + auto input_size_flag = utilities::parse_string(input_size_string, input_size); + + if (!input_size_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input %s(%s) is not allowed, please check it.\n", kind.c_str(), value.c_str()); + }; + + show_error("size", input_size_string); + + return -1; + } + + auto repeat = cmd.get("repeat"); + + // 1. print args + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "model file : %s\n", model_file.c_str()); + fprintf(stdout, "image file : %s\n", image_file.c_str()); + fprintf(stdout, "img_h, img_w : %d %d\n", input_size[0], input_size[1]); + fprintf(stdout, "--------------------------------------\n"); + + // 2. read image & resize & transpose + std::vector image(input_size[0] * input_size[1] * 3, 0); + cv::Mat mat = cv::imread(image_file); + if (mat.empty()) + { + fprintf(stderr, "Read image failed.\n"); + return -1; + } + common::get_input_data_letterbox(mat, image, input_size[0], input_size[1]); + + // 3. sys_init + AX_SYS_Init(); + + // 4. - engine model - can only use AX_ENGINE** inside + { + // AX_ENGINE_NPUReset(); // todo ?? + ax::run_model(model_file, image, repeat, mat, input_size[0], input_size[1]); + + // 4.3 engine de init + AX_ENGINE_Deinit(); + // AX_ENGINE_NPUReset(); + } + // 4. - engine model - + + AX_SYS_Deinit(); + return 0; +} diff --git a/examples/ax620e/middleware/io.hpp b/examples/ax620e/middleware/io.hpp index daafae1..fa6c4af 100644 --- a/examples/ax620e/middleware/io.hpp +++ b/examples/ax620e/middleware/io.hpp @@ -58,7 +58,7 @@ namespace middleware void free_io_index(AX_ENGINE_IO_BUFFER_T* io_buf, size_t index) { - for (int i = 0; i < index; ++i) + for (int i = 0; i < (int)index; ++i) { AX_ENGINE_IO_BUFFER_T* pBuf = io_buf + i; AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); @@ -89,7 +89,7 @@ namespace middleware io_data->nInputSize = info->nInputSize; auto ret = 0; - for (int i = 0; i < info->nInputSize; ++i) + for (int i = 0; i < (int)info->nInputSize; ++i) { auto meta = info->pInputs[i]; auto buffer = &io_data->pInputs[i]; @@ -114,7 +114,7 @@ namespace middleware io_data->pOutputs = new AX_ENGINE_IO_BUFFER_T[info->nOutputSize]; memset(io_data->pOutputs, 0, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nOutputSize); io_data->nOutputSize = info->nOutputSize; - for (int i = 0; i < info->nOutputSize; ++i) + for (int i = 0; i < (int)info->nOutputSize; ++i) { auto meta = info->pOutputs[i]; auto buffer = &io_data->pOutputs[i]; @@ -185,7 +185,7 @@ namespace middleware for (size_t s = 0; s < info.nShapeSize; s++) { printf("%d", info.pShape[s]); - if (s != info.nShapeSize - 1) + if (s != (int)info.nShapeSize - 1) { printf(" x "); } @@ -202,7 +202,7 @@ namespace middleware for (size_t s = 0; s < info.nShapeSize; s++) { printf("%d", info.pShape[s]); - if (s != info.nShapeSize - 1) + if (s != (int)info.nShapeSize - 1) { printf(" x "); } diff --git a/examples/ax650/CMakeLists.txt b/examples/ax650/CMakeLists.txt index 7de0e84..6a66b79 100644 --- a/examples/ax650/CMakeLists.txt +++ b/examples/ax650/CMakeLists.txt @@ -28,13 +28,12 @@ axera_example(ax_yolov5_face ax_yolov5_face_steps.cc) axera_example(ax_yolov6 ax_yolov6_steps.cc) axera_example(ax_yolov7_tiny_face ax_yolov7_tiny_face_steps.cc) axera_example(ax_yolov7 ax_yolov7_steps.cc) -axera_example(ax_yolov8 ax_yolov8s_steps.cc) -axera_example(ax_yolov8_pose ax_yolov8s_pose_steps.cc) +axera_example(ax_yolov8 ax_yolov8_steps.cc) +axera_example(ax_yolov8_seg ax_yolov8_seg_steps.cc) +axera_example(ax_yolov8_pose ax_yolov8_pose_steps.cc) axera_example(ax_yolox ax_yolox_steps.cc) axera_example(ax_yolo_nas ax_yolo_nas_steps.cc) -axera_example(ax_yolov8_native ax_yolov8s_native_steps.cc) -axera_example(ax_yolov8_pose_native ax_yolov8s_pose_native_steps.cc) axera_example(ax_ppyoloe ax_ppyoloe_steps.cc) axera_example(ax_ppyoloe_obj365 ax_ppyoloe_obj365_steps.cc) diff --git a/examples/ax650/ax_yolov8s_pose_native_steps.cc b/examples/ax650/ax_yolov8_pose_steps.cc similarity index 100% rename from examples/ax650/ax_yolov8s_pose_native_steps.cc rename to examples/ax650/ax_yolov8_pose_steps.cc diff --git a/examples/ax650/ax_yolov8_seg_steps.cc b/examples/ax650/ax_yolov8_seg_steps.cc new file mode 100644 index 0000000..aa571e0 --- /dev/null +++ b/examples/ax650/ax_yolov8_seg_steps.cc @@ -0,0 +1,259 @@ +/* +* AXERA is pleased to support the open source community by making ax-samples available. +* +* Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved. +* +* Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +* in compliance with the License. You may obtain a copy of the License at +* +* https://opensource.org/licenses/BSD-3-Clause +* +* Unless required by applicable law or agreed to in writing, software distributed +* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ + +/* +* Author: ZHEQIUSHUI +*/ + +#include +#include +#include + +#include +#include "base/common.hpp" +#include "base/detection.hpp" +#include "middleware/io.hpp" + +#include "utilities/args.hpp" +#include "utilities/cmdline.hpp" +#include "utilities/file.hpp" +#include "utilities/timer.hpp" + +#include +#include + +const int DEFAULT_IMG_H = 640; +const int DEFAULT_IMG_W = 640; + +const char* CLASS_NAMES[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; +static const std::vector > COCO_COLORS = { + {56, 0, 255}, {226, 255, 0}, {0, 94, 255}, {0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, {255, 151, 0}, {170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169}, {255, 0, 207}, {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255}, {94, 0, 255}, {0, 255, 113}, {255, 18, 0}, {255, 0, 56}, {18, 0, 255}, {0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0}, {75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245}, {255, 132, 0}, {226, 0, 255}, {255, 37, 0}, {207, 255, 0}, {0, 255, 207}, {94, 255, 0}, {0, 226, 255}, {56, 255, 0}, {255, 94, 0}, {255, 113, 0}, {0, 132, 255}, {255, 0, 132}, {255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, {113, 0, 255}, {255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113}, {0, 255, 188}, {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132}, {0, 188, 255}, {0, 245, 255}, {0, 169, 255}, {37, 255, 0}, {255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0}, {255, 0, 170}, {255, 0, 37}, {255, 75, 0}, {0, 0, 255}, {255, 207, 0}, {255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, {0, 255, 75}, {0, 255, 151}, {255, 56, 0}, {245, 255, 0}}; +int NUM_CLASS = 80; + +const int DEFAULT_LOOP_COUNT = 1; +const int DEFAULT_MASK_PROTO_DIM = 32; +const int DEFAULT_MASK_SAMPLE_STRIDE = 4; +const float PROB_THRESHOLD = 0.45f; +const float NMS_THRESHOLD = 0.45f; +namespace ax +{ + void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, int input_w, int input_h, const std::vector& time_costs) + { + std::vector proposals; + std::vector objects; + timer timer_postprocess; + float* output_ptr[3] = {(float*)io_data->pOutputs[0].pVirAddr, + (float*)io_data->pOutputs[1].pVirAddr, + (float*)io_data->pOutputs[2].pVirAddr}; + float* output_seg_ptr[3] = {(float*)io_data->pOutputs[3].pVirAddr, + (float*)io_data->pOutputs[4].pVirAddr, + (float*)io_data->pOutputs[5].pVirAddr}; + for (int i = 0; i < 3; ++i) + { + auto feat_ptr = output_ptr[i]; + auto feat_seg_ptr = output_seg_ptr[i]; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_seg_native(stride, feat_ptr, feat_seg_ptr, PROB_THRESHOLD, proposals, input_w, input_h, NUM_CLASS); + } + + auto mask_proto_ptr = (float*)io_data->pOutputs[6].pVirAddr; + + detection::get_out_bbox_mask(proposals, objects, mask_proto_ptr, DEFAULT_MASK_PROTO_DIM, DEFAULT_MASK_SAMPLE_STRIDE, NMS_THRESHOLD, input_h, input_w, mat.rows, mat.cols); + fprintf(stdout, "post process cost time:%.2f ms \n", timer_postprocess.cost()); + fprintf(stdout, "--------------------------------------\n"); + auto total_time = std::accumulate(time_costs.begin(), time_costs.end(), 0.f); + auto min_max_time = std::minmax_element(time_costs.begin(), time_costs.end()); + fprintf(stdout, + "Repeat %d times, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", + (int)time_costs.size(), + total_time / (float)time_costs.size(), + *min_max_time.second, + *min_max_time.first); + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "detection num: %zu\n", objects.size()); + + detection::draw_objects_mask(mat, objects, CLASS_NAMES, COCO_COLORS, "yolov8_seg_out"); + } + + bool run_model(const std::string& model, const std::vector& data, const int& repeat, cv::Mat& mat, int input_h, int input_w) + { + // 1. init engine +#ifdef AXERA_TARGET_CHIP_AX620E + auto ret = AX_ENGINE_Init(); +#else + AX_ENGINE_NPU_ATTR_T npu_attr; + memset(&npu_attr, 0, sizeof(npu_attr)); + npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE; + auto ret = AX_ENGINE_Init(&npu_attr); +#endif + if (0 != ret) + { + return ret; + } + + // 2. load model + std::vector model_buffer; + if (!utilities::read_file(model, model_buffer)) + { + fprintf(stderr, "Read Run-Joint model(%s) file failed.\n", model.c_str()); + return false; + } + + // 3. create handle + AX_ENGINE_HANDLE handle; + ret = AX_ENGINE_CreateHandle(&handle, model_buffer.data(), model_buffer.size()); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating handle is done.\n"); + + // 4. create context + ret = AX_ENGINE_CreateContext(handle); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating context is done.\n"); + + // 5. set io + AX_ENGINE_IO_INFO_T* io_info; + ret = AX_ENGINE_GetIOInfo(handle, &io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine get io info is done. \n"); + + // 6. alloc io + AX_ENGINE_IO_T io_data; + ret = middleware::prepare_io(io_info, &io_data, std::make_pair(AX_ENGINE_ABST_DEFAULT, AX_ENGINE_ABST_CACHED)); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine alloc io is done. \n"); + + // 7. insert input + ret = middleware::push_input(data, &io_data, io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + fprintf(stdout, "Engine push input is done. \n"); + fprintf(stdout, "--------------------------------------\n"); + + // 8. warn up + for (int i = 0; i < 5; ++i) + { + AX_ENGINE_RunSync(handle, &io_data); + } + + // 9. run model + std::vector time_costs(repeat, 0); + for (int i = 0; i < repeat; ++i) + { + timer tick; + ret = AX_ENGINE_RunSync(handle, &io_data); + time_costs[i] = tick.cost(); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + } + + // 10. get result + post_process(io_info, &io_data, mat, input_w, input_h, time_costs); + fprintf(stdout, "--------------------------------------\n"); + + middleware::free_io(&io_data); + return AX_ENGINE_DestroyHandle(handle); + } +} // namespace ax + +int main(int argc, char* argv[]) +{ + cmdline::parser cmd; + cmd.add("model", 'm', "joint file(a.k.a. joint model)", true, ""); + cmd.add("image", 'i', "image file", true, ""); + cmd.add("size", 'g', "input_h, input_w", false, std::to_string(DEFAULT_IMG_H) + "," + std::to_string(DEFAULT_IMG_W)); + + cmd.add("repeat", 'r', "repeat count", false, DEFAULT_LOOP_COUNT); + cmd.parse_check(argc, argv); + + // 0. get app args, can be removed from user's app + auto model_file = cmd.get("model"); + auto image_file = cmd.get("image"); + + auto model_file_flag = utilities::file_exist(model_file); + auto image_file_flag = utilities::file_exist(image_file); + + if (!model_file_flag | !image_file_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input file %s(%s) is not exist, please check it.\n", kind.c_str(), value.c_str()); + }; + + if (!model_file_flag) { show_error("model", model_file); } + if (!image_file_flag) { show_error("image", image_file); } + + return -1; + } + + auto input_size_string = cmd.get("size"); + + std::array input_size = {DEFAULT_IMG_H, DEFAULT_IMG_W}; + + auto input_size_flag = utilities::parse_string(input_size_string, input_size); + + if (!input_size_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input %s(%s) is not allowed, please check it.\n", kind.c_str(), value.c_str()); + }; + + show_error("size", input_size_string); + + return -1; + } + + auto repeat = cmd.get("repeat"); + + // 1. print args + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "model file : %s\n", model_file.c_str()); + fprintf(stdout, "image file : %s\n", image_file.c_str()); + fprintf(stdout, "img_h, img_w : %d %d\n", input_size[0], input_size[1]); + fprintf(stdout, "--------------------------------------\n"); + + // 2. read image & resize & transpose + std::vector image(input_size[0] * input_size[1] * 3, 0); + cv::Mat mat = cv::imread(image_file); + if (mat.empty()) + { + fprintf(stderr, "Read image failed.\n"); + return -1; + } + common::get_input_data_letterbox(mat, image, input_size[0], input_size[1]); + + // 3. sys_init + AX_SYS_Init(); + + // 4. - engine model - can only use AX_ENGINE** inside + { + // AX_ENGINE_NPUReset(); // todo ?? + ax::run_model(model_file, image, repeat, mat, input_size[0], input_size[1]); + + // 4.3 engine de init + AX_ENGINE_Deinit(); + // AX_ENGINE_NPUReset(); + } + // 4. - engine model - + + AX_SYS_Deinit(); + return 0; +} diff --git a/examples/ax650/ax_yolov8s_native_steps.cc b/examples/ax650/ax_yolov8_steps.cc similarity index 100% rename from examples/ax650/ax_yolov8s_native_steps.cc rename to examples/ax650/ax_yolov8_steps.cc diff --git a/examples/ax650/ax_yolov8s_pose_steps.cc b/examples/ax650/deprecated/ax_yolov8s_pose_steps.cc similarity index 100% rename from examples/ax650/ax_yolov8s_pose_steps.cc rename to examples/ax650/deprecated/ax_yolov8s_pose_steps.cc diff --git a/examples/ax650/ax_yolov8s_steps.cc b/examples/ax650/deprecated/ax_yolov8s_steps.cc similarity index 100% rename from examples/ax650/ax_yolov8s_steps.cc rename to examples/ax650/deprecated/ax_yolov8s_steps.cc diff --git a/examples/base/detection.hpp b/examples/base/detection.hpp index 05ff11c..72cae5a 100644 --- a/examples/base/detection.hpp +++ b/examples/base/detection.hpp @@ -1312,7 +1312,7 @@ namespace detection } } - static void generate_proposals_yolov8_native(int stride, const float *feat, float prob_threshold, std::vector &objects, + static void generate_proposals_yolov8_native(int stride, const float* feat, float prob_threshold, std::vector& objects, int letterbox_cols, int letterbox_rows, int cls_num = 80) { int feat_w = letterbox_cols / stride; @@ -1378,7 +1378,7 @@ namespace detection } } - static void generate_proposals_yolov8_seg_native(int stride, const float *feat, float prob_threshold, std::vector &objects, + static void generate_proposals_yolov8_seg_native(int stride, const float* feat, const float* feat_seg, float prob_threshold, std::vector& objects, int letterbox_cols, int letterbox_rows, int cls_num = 80, int mask_proto_dim = 32) { int feat_w = letterbox_cols / stride; @@ -1386,6 +1386,7 @@ namespace detection int reg_max = 16; auto feat_ptr = feat; + auto feat_seg_ptr = feat_seg; std::vector dis_after_sm(reg_max, 0.f); for (int h = 0; h <= feat_h - 1; h++) @@ -1395,7 +1396,7 @@ namespace detection // process cls score int class_index = 0; float class_score = -FLT_MAX; - for (int s = 0; s < cls_num ; s++) + for (int s = 0; s < cls_num; s++) { float score = feat_ptr[s + 4 * reg_max]; if (score > class_score) @@ -1436,19 +1437,21 @@ namespace detection obj.label = class_index; obj.prob = box_prob; obj.mask_feat.resize(mask_proto_dim); - for (int k = 0; k < mask_proto_dim; k++) - { - obj.mask_feat[k] = feat_ptr[cls_num + 4 * reg_max + k]; - } + memcpy(obj.mask_feat.data(), feat_seg_ptr, sizeof(float) * mask_proto_dim); + // for (int k = 0; k < mask_proto_dim; k++) + // { + // obj.mask_feat[k] = feat_seg_ptr[k]; + // } objects.push_back(obj); } - feat_ptr += (cls_num + 4 * reg_max + mask_proto_dim); + feat_ptr += cls_num + 4 * reg_max; + feat_seg_ptr += mask_proto_dim; } } } - static void generate_proposals_yolov8_pose_native(int stride, const float *feat, const float *feat_kps, float prob_threshold, std::vector &objects, + static void generate_proposals_yolov8_pose_native(int stride, const float* feat, const float* feat_kps, float prob_threshold, std::vector& objects, int letterbox_cols, int letterbox_rows, const int num_point = 17, int cls_num = 1) { int feat_w = letterbox_cols / stride; @@ -1523,7 +1526,6 @@ namespace detection } } - static void generate_proposals(int stride, const float* feat, float prob_threshold, std::vector& objects, int letterbox_cols, int letterbox_rows, const float* anchors, int cls_num = 80) {