Skip to content

Commit

Permalink
pre-alloc remote memory; rebase from vpux/2021/3
Browse files Browse the repository at this point in the history
  • Loading branch information
luo-cheng2021 committed Apr 15, 2021
1 parent 6f860dd commit 4cdb4bb
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 3 deletions.
18 changes: 16 additions & 2 deletions inference-engine/samples/benchmark_app/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,23 @@
file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)

find_package(HddlUnite QUIET)
if(HddlUnite_FOUND)
# test kmb_plugin directory
if(EXISTS ${KMBPLUGIN_HOME}/include/)
set(HDDL2_PARAMS_DIR ${KMBPLUGIN_HOME}/include/)
add_definitions(-DUSE_REMOTE_MEM)
set(HDDL2_DEP "HddlUnite::HddlUnite")
else()
message(WARNING "kmb_plugin/include could not find. Preallocating memory in remote device feature is disabled.")
remove_definitions(-DUSE_REMOTE_MEM)
endif()
endif()

ie_add_sample(NAME benchmark_app
SOURCES ${SRC}
HEADERS ${HDR}
DEPENDENCIES format_reader
OPENCV_DEPENDENCIES imgcodecs)
DEPENDENCIES format_reader ${HDDL2_DEP}
OPENCV_DEPENDENCIES imgcodecs
INCLUDE_DIRECTORIES ${HDDL2_PARAMS_DIR})

1 change: 1 addition & 0 deletions inference-engine/samples/benchmark_app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Options:
-progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
-shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
-layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
-use_remote_mem Optional. Prealloc remote memory in device to execute infer request.
CPU-specific performance options:
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
Expand Down
13 changes: 13 additions & 0 deletions inference-engine/samples/benchmark_app/benchmark_app.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file
static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
#endif

#ifdef USE_REMOTE_MEM
// @brief message for preallocing memory option
static const char use_remote_mem_message[] = "Optional. Prealloc remote memory in device to execute infer request.";
#endif

static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
" in case of one input size.";

Expand Down Expand Up @@ -202,6 +207,11 @@ DEFINE_string(load_config, "", load_config_message);
DEFINE_string(dump_config, "", dump_config_message);
#endif

#ifdef USE_REMOTE_MEM
/// @brief Define flag for using prealloc memory option <br>
DEFINE_bool(use_remote_mem, false, use_remote_mem_message);
#endif

/// @brief Define flag for input shape <br>
DEFINE_string(shape, "", shape_message);

Expand Down Expand Up @@ -247,6 +257,9 @@ static void showUsage() {
std::cout << " -progress " << progress_message << std::endl;
std::cout << " -shape " << shape_message << std::endl;
std::cout << " -layout " << layout_message << std::endl;
#ifdef USE_REMOTE_MEM
std::cout << " -use_remote_mem " << use_remote_mem_message << std::endl;
#endif
std::cout << std::endl << " device-specific performance options:" << std::endl;
std::cout << " -nstreams \"<integer>\" " << infer_num_streams_message << std::endl;
std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl;
Expand Down
4 changes: 4 additions & 0 deletions inference-engine/samples/benchmark_app/infer_request_wrap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class InferReqWrap final {
return _request.GetBlob(name);
}

void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
_request.SetBlob(name, data);
}

double getExecutionTimeInMilliseconds() const {
auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
return static_cast<double>(execTime.count()) * 0.000001;
Expand Down
20 changes: 20 additions & 0 deletions inference-engine/samples/benchmark_app/inputs_filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <samples/slog.hpp>

#include "inputs_filling.hpp"
#include "../../src/plugin_api/blob_factory.hpp"

using namespace InferenceEngine;

Expand Down Expand Up @@ -188,10 +189,19 @@ void fillBlobImInfo(Blob::Ptr& inputBlob,
}
}

#ifdef USE_REMOTE_MEM
void fillBlobs(RemoteContextHelper& remoteContextHelper,
const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests,
bool preallocImage) {
#else
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests) {
#endif
std::vector<std::pair<size_t, size_t>> input_image_sizes;
for (auto& item : app_inputs_info) {
if (item.second.isImage()) {
Expand Down Expand Up @@ -270,6 +280,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
if (!imageFiles.empty()) {
// Fill with Images
fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
#ifdef USE_REMOTE_MEM
if (preallocImage) {
remoteContextHelper.PreallocRemoteMem(requests.at(requestId), item.first, inputBlob);
}
#endif
continue;
}
} else {
Expand Down Expand Up @@ -333,6 +348,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
} else {
THROW_IE_EXCEPTION << "Input precision is not supported for " << item.first;
}
#ifdef USE_REMOTE_MEM
if (preallocImage) {
remoteContextHelper.PreallocRemoteMem(requests.at(requestId), item.first, inputBlob);
}
#endif
}
}
}
10 changes: 10 additions & 0 deletions inference-engine/samples/benchmark_app/inputs_filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,17 @@
#include "utils.hpp"
#include "infer_request_wrap.hpp"

#include "remotecontext_helper.hpp"

#ifdef USE_REMOTE_MEM
void fillBlobs(RemoteContextHelper& remoteContextHelper,
const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests, bool preallocImage);
#else
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests);
#endif
42 changes: 41 additions & 1 deletion inference-engine/samples/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "statistics_report.hpp"
#include "inputs_filling.hpp"
#include "utils.hpp"
#include "remotecontext_helper.hpp"

using namespace InferenceEngine;

Expand Down Expand Up @@ -75,6 +76,11 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {

throw std::logic_error(err);
}
#ifdef USE_REMOTE_MEM
if (FLAGS_use_remote_mem && FLAGS_d != "VPUX") {
throw std::logic_error("Incorrect device name. Using remote memory feature must set device name to VPUX.");
}
#endif
return true;
}

Expand Down Expand Up @@ -117,8 +123,13 @@ T getMedianValue(const std::vector<T> &vec) {
int main(int argc, char *argv[]) {
std::shared_ptr<StatisticsReport> statistics;
try {
Core ie;
ExecutableNetwork exeNetwork;

#ifdef USE_REMOTE_MEM
RemoteContextHelper remoteContextHelper;
#endif

// ----------------- 1. Parsing and validating input arguments -------------------------------------------------
next_step();

Expand Down Expand Up @@ -170,7 +181,6 @@ int main(int argc, char *argv[]) {
// ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
next_step();

Core ie;
if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) {
// CPU (MKLDNN) extensions is loaded as a shared library and passed as a pointer to base extension
const auto extension_ptr = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(FLAGS_l);
Expand All @@ -185,6 +195,11 @@ int main(int argc, char *argv[]) {
config["GPU"] = {};
config["GPU"][CONFIG_KEY(CONFIG_FILE)] = FLAGS_c;
}
#ifdef USE_REMOTE_MEM
if (FLAGS_d.find("VPUX") != std::string::npos) {
remoteContextHelper.Init(ie);
}
#endif
if (config.count("GPU") && config.at("GPU").count(CONFIG_KEY(CONFIG_FILE))) {
auto ext = config.at("GPU").at(CONFIG_KEY(CONFIG_FILE));
ie.SetConfig({{ CONFIG_KEY(CONFIG_FILE), ext }}, "GPU");
Expand Down Expand Up @@ -395,7 +410,15 @@ int main(int argc, char *argv[]) {
// ----------------- 7. Loading the model to the device --------------------------------------------------------
next_step();
startTime = Time::now();
#ifdef USE_REMOTE_MEM
if (FLAGS_use_remote_mem == false) {
exeNetwork = ie.LoadNetwork(cnnNetwork, device_name);
} else {
exeNetwork = ie.LoadNetwork(cnnNetwork, remoteContextHelper.getRemoteContext());
}
#else
exeNetwork = ie.LoadNetwork(cnnNetwork, device_name);
#endif
duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
Expand All @@ -413,7 +436,20 @@ int main(int argc, char *argv[]) {
// ----------------- 7. Loading the model to the device --------------------------------------------------------
next_step();
auto startTime = Time::now();
#ifdef USE_REMOTE_MEM
if (FLAGS_use_remote_mem == false) {
exeNetwork = ie.ImportNetwork(FLAGS_m, device_name, {});
} else {
std::filebuf blobFile;
if (!blobFile.open(FLAGS_m, std::ios::in | std::ios::binary)) {
THROW_IE_EXCEPTION << "Could not open file: " << FLAGS_m;
}
std::istream graphBlob(&blobFile);
exeNetwork = ie.ImportNetwork(graphBlob, remoteContextHelper.getRemoteContext());
}
#else
exeNetwork = ie.ImportNetwork(FLAGS_m, device_name, {});
#endif
auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
if (statistics)
Expand Down Expand Up @@ -500,7 +536,11 @@ int main(int argc, char *argv[]) {
next_step();

InferRequestsQueue inferRequestsQueue(exeNetwork, nireq);
#ifdef USE_REMOTE_MEM
fillBlobs(remoteContextHelper, inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, FLAGS_use_remote_mem);
#else
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
#endif

// ----------------- 10. Measuring performance ------------------------------------------------------------------
size_t progressCnt = 0;
Expand Down
111 changes: 111 additions & 0 deletions inference-engine/samples/benchmark_app/remotecontext_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#ifdef USE_REMOTE_MEM
#include <string>
#include <algorithm>
#include <utility>
#include <vector>
#include <map>
#include <regex>

#include <inference_engine.hpp>

#include <samples/common.hpp>
#include <samples/slog.hpp>

#include "utils.hpp"
#include "remotecontext_helper.hpp"
#include "WorkloadContext.h"
#include "RemoteMemory.h"
#include "hddl2/hddl2_params.hpp"
#include "ie_compound_blob.h"

using namespace InferenceEngine;

class RemoteContextHelper::Impl {
WorkloadID _workloadId = -1;
HddlUnite::WorkloadContext::Ptr _context;
RemoteContext::Ptr _contextPtr;
bool _init = false;

public:
void Init(InferenceEngine::Core& ie) {
_context = HddlUnite::createWorkloadContext();
_context->setContext(_workloadId);
auto ret = registerWorkloadContext(_context);
if (ret != HddlStatusCode::HDDL_OK) {
THROW_IE_EXCEPTION << "registerWorkloadContext failed with " << ret;
}

// init context map and create context based on it
ParamMap paramMap = { {HDDL2_PARAM_KEY(WORKLOAD_CONTEXT_ID), _workloadId} };
_contextPtr = ie.CreateContext("VPUX", paramMap);
_init = true;
}

HddlUnite::RemoteMemory::Ptr allocateRemoteMemory(const void* data, const size_t& dataSize) {
auto remoteFrame = std::make_shared<HddlUnite::RemoteMemory>(*_context,
HddlUnite::RemoteMemoryDesc(dataSize, 1, dataSize, 1));

if (remoteFrame == nullptr) {
THROW_IE_EXCEPTION << "Failed to allocate remote memory.";
}

if (remoteFrame->syncToDevice(data, dataSize) != HDDL_OK) {
THROW_IE_EXCEPTION << "Failed to sync memory to device.";
}
return remoteFrame;
}

void PreallocRemoteMem(InferReqWrap::Ptr& request,
const std::string& inputBlobName,
const Blob::Ptr& inputBlob) {
if (_init == false)
THROW_IE_EXCEPTION << "RemoteContextHelper did not init.";
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
const TensorDesc& inputTensor = minput->getTensorDesc();
// locked memory holder should be alive all time while access to its buffer happens
auto minputHolder = minput->rmap();
auto inputBlobData = minputHolder.as<uint8_t*>();

// 1, allocate memory with HddlUnite on device
auto remoteMemory = allocateRemoteMemory(inputBlobData, minput->byteSize());

// 2, create remote blob by using already exists remote memory and specify color format of it
ParamMap blobParamMap = { {HDDL2_PARAM_KEY(REMOTE_MEMORY), remoteMemory} };
RemoteBlob::Ptr remoteBlobPtr = _contextPtr->CreateBlob(inputTensor, blobParamMap);
if (remoteBlobPtr == nullptr) {
THROW_IE_EXCEPTION << "CreateBlob failed.";
}

// 3, set remote blob
request->setBlob(inputBlobName, remoteBlobPtr);
}

RemoteContext::Ptr getRemoteContext() {
if (_init == false)
THROW_IE_EXCEPTION << "RemoteContextHelper did not init.";
return _contextPtr;
}
};

RemoteContextHelper::RemoteContextHelper() : _impl(new RemoteContextHelper::Impl()) {
}

RemoteContextHelper::~RemoteContextHelper() {
}

void RemoteContextHelper::Init(InferenceEngine::Core& ie) {
_impl->Init(ie);
}

void RemoteContextHelper::PreallocRemoteMem(InferReqWrap::Ptr& request, const std::string& inputBlobName, const Blob::Ptr& inputBlob) {
_impl->PreallocRemoteMem(request, inputBlobName, inputBlob);
}

InferenceEngine::RemoteContext::Ptr RemoteContextHelper::getRemoteContext() {
return _impl->getRemoteContext();
}
#endif
26 changes: 26 additions & 0 deletions inference-engine/samples/benchmark_app/remotecontext_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <string>
#include <vector>
#include <map>
#include <memory>
#include <inference_engine.hpp>
#include "infer_request_wrap.hpp"

class RemoteContextHelper {
class Impl;
std::unique_ptr<Impl> _impl;
public:
RemoteContextHelper();
~RemoteContextHelper();

void Init(InferenceEngine::Core& ie);
void PreallocRemoteMem(InferReqWrap::Ptr& request,
const std::string& inputBlobName,
const InferenceEngine::Blob::Ptr& inputBlob);
InferenceEngine::RemoteContext::Ptr getRemoteContext();
};

0 comments on commit 4cdb4bb

Please sign in to comment.