Skip to content

Commit aa7f7a4

Browse files
committed
updat_to_latest
1 parent 80c6074 commit aa7f7a4

File tree

2 files changed

+149
-49
lines changed

2 files changed

+149
-49
lines changed

model_api/cpp/adapters/include/adapters/ovms_adapter.h

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@
2626
#include "adapters/inference_adapter.h"
2727
#include <openvino/openvino.hpp>
2828

29+
#include "ovms.h" // NOLINT
30+
2931
// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe)
3032
// for the one inside OVMS repo it makes sense to reuse code from ovms lib
3133

3234
class OVMS_Server_;
3335
typedef struct OVMS_Server_ OVMS_Server;
34-
namespace mediapipe {
35-
namespace ovms {
36+
37+
namespace mediapipe::ovms {
3638

3739
using InferenceOutput = std::map<std::string, ov::Tensor>;
3840
using InferenceInput = std::map<std::string, ov::Tensor>;
@@ -47,12 +49,25 @@ class OVMSInferenceAdapter : public ::InferenceAdapter {
4749
std::vector<std::string> inputNames;
4850
std::vector<std::string> outputNames;
4951
shapes_min_max_t inShapesMinMaxes;
52+
shapes_min_max_t outShapesMinMaxes;
53+
std::unordered_map<std::string, ov::element::Type_t> inputDatatypes;
54+
std::unordered_map<std::string, ov::element::Type_t> outputDatatypes;
5055
ov::AnyMap modelConfig;
5156

5257
public:
53-
OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion = 0, OVMS_Server* server = nullptr);
58+
// TODO Windows: Fix definition in header - does not compile in cpp.
59+
OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion = 0, OVMS_Server* server = nullptr) :
60+
servableName(servableName),
61+
servableVersion(servableVersion) {
62+
if (nullptr != server) {
63+
this->cserver = server;
64+
} else {
65+
OVMS_ServerNew(&this->cserver);
66+
}
67+
}
5468
virtual ~OVMSInferenceAdapter();
5569
InferenceOutput infer(const InferenceInput& input) override;
70+
void infer(const InferenceInput& input, InferenceOutput& output) override;
5671
void loadModel(const std::shared_ptr<const ov::Model>& model, ov::Core& core,
5772
const std::string& device, const ov::AnyMap& compilationConfig, size_t max_num_requests = 1) override;
5873
void inferAsync(const InferenceInput& input, const CallbackData callback_args) override;
@@ -62,9 +77,11 @@ class OVMSInferenceAdapter : public ::InferenceAdapter {
6277
void awaitAny();
6378
size_t getNumAsyncExecutors() const;
6479
ov::PartialShape getInputShape(const std::string& inputName) const override;
80+
ov::PartialShape getOutputShape(const std::string& outputName) const override;
81+
ov::element::Type_t getInputDatatype(const std::string& inputName) const override;
82+
ov::element::Type_t getOutputDatatype(const std::string& outputName) const override;
6583
std::vector<std::string> getInputNames() const override;
6684
std::vector<std::string> getOutputNames() const override;
6785
const ov::AnyMap& getModelConfig() const override;
6886
};
69-
} // namespace ovms
70-
} // namespace mediapipe
87+
} // namespace mediapipe::ovms

model_api/cpp/adapters/src/ovms_adapter.cpp

Lines changed: 127 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,15 @@
3232
#pragma GCC diagnostic pop
3333
// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe)
3434
// for the one inside OVMS repo it makes sense to reuse code from ovms lib
35-
namespace mediapipe {
36-
35+
namespace mediapipe::ovms {
36+
3737
using std::endl;
38+
#pragma GCC diagnostic push
39+
#pragma GCC diagnostic ignored "-Wunused-function"
40+
using InferenceOutput = std::map<std::string, ov::Tensor>;
41+
using InferenceInput = std::map<std::string, ov::Tensor>;
3842

39-
40-
#define THROW_IF_CIRCULAR_ERR(C_API_CALL) \
41-
{ \
42-
auto* fatalErr = C_API_CALL; \
43-
if (fatalErr != nullptr) { \
44-
std::runtime_error exc("Getting status details circular error"); \
45-
throw exc; \
46-
} \
47-
}
43+
#define THROW_IF_CIRCULAR_ERR(C_API_CALL) { auto* fatalErr = C_API_CALL;if (fatalErr != nullptr) {std::runtime_error exc("Getting status details circular error");throw exc; } }
4844

4945
#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \
5046
{ \
@@ -59,58 +55,115 @@ using std::endl;
5955
throw exc; \
6056
} \
6157
}
58+
6259
#define CREATE_GUARD(GUARD_NAME, CAPI_TYPE, CAPI_PTR) \
6360
std::unique_ptr<CAPI_TYPE, decltype(&(CAPI_TYPE##Delete))> GUARD_NAME(CAPI_PTR, &(CAPI_TYPE##Delete));
6461

65-
#pragma GCC diagnostic push
66-
#pragma GCC diagnostic ignored "-Wunused-function"
67-
using InferenceOutput = std::map<std::string, ov::Tensor>;
68-
using InferenceInput = std::map<std::string, ov::Tensor>;
69-
70-
namespace ovms {
7162
static OVMS_DataType OVPrecision2CAPI(ov::element::Type_t datatype);
7263
static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype);
7364
static ov::Tensor makeOvTensor(OVMS_DataType datatype, const int64_t* shape, size_t dimCount, const void* voutputData, size_t bytesize);
7465

75-
OVMSInferenceAdapter::OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion, OVMS_Server* cserver) :
76-
servableName(servableName),
77-
servableVersion(servableVersion) {
78-
if (nullptr != cserver) {
79-
this->cserver = cserver;
80-
} else {
81-
OVMS_ServerNew(&this->cserver);
82-
}
66+
OVMSInferenceAdapter::~OVMSInferenceAdapter() {
67+
// LOG(INFO) << "OVMSAdapter destr";
8368
}
8469

85-
OVMSInferenceAdapter::~OVMSInferenceAdapter() {
86-
//LOG(INFO) << "OVMSAdapter destr";
70+
inline std::vector<int64_t> getShapeAcceptableByCAPI(const ov::Shape& shape) {
71+
if (std::any_of(shape.begin(), shape.end(), [](size_t dim) {
72+
return dim > std::numeric_limits<int64_t>::max();})) {
73+
throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value");
74+
}
75+
return std::vector<int64_t>{shape.begin(), shape.end()};
8776
}
8877

89-
InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
78+
void OVMSInferenceAdapter::infer(const InferenceInput& input, InferenceOutput& output) {
9079
/////////////////////
9180
// PREPARE REQUEST
9281
/////////////////////
9382
OVMS_InferenceRequest* request{nullptr};
9483
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion));
9584
CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request);
96-
97-
InferenceOutput output;
85+
9886
OVMS_Status* status{nullptr};
87+
std::vector<std::string> outputsSet;
9988
// PREPARE EACH INPUT
10089
// extract single tensor
10190
for (const auto& [name, input_tensor] : input) {
102-
const char* realInputName = name.c_str();
103-
const auto& ovinputShape = input_tensor.get_shape();
104-
if (std::any_of(ovinputShape.begin(), ovinputShape.end(), [](size_t dim) {
105-
return dim > std::numeric_limits<int64_t>::max();})) {
106-
throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value");
91+
const char* realName = name.c_str();
92+
const auto& ovShape = input_tensor.get_shape();
93+
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
94+
OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type());
95+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
96+
const uint32_t NOT_USED_NUM = 0;
97+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request,
98+
realName,
99+
reinterpret_cast<void*>(input_tensor.data()),
100+
input_tensor.get_byte_size(),
101+
OVMS_BUFFERTYPE_CPU,
102+
NOT_USED_NUM));
103+
}
104+
for (const auto& [name, output_tensor] : output) {
105+
outputsSet.emplace_back(name);
106+
const char* realName = name.c_str();
107+
const auto& ovShape = output_tensor.get_shape();
108+
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
109+
OVMS_DataType inputDataType = OVPrecision2CAPI(output_tensor.get_element_type());
110+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddOutput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
111+
const uint32_t NOT_USED_NUM = 0;
112+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestOutputSetData(request,
113+
realName,
114+
reinterpret_cast<void*>(output_tensor.data()),
115+
output_tensor.get_byte_size(),
116+
OVMS_BUFFERTYPE_CPU,
117+
NOT_USED_NUM));
118+
119+
}
120+
121+
//////////////////
122+
// INFERENCE
123+
//////////////////
124+
OVMS_InferenceResponse* response = nullptr;
125+
ASSERT_CAPI_STATUS_NULL(OVMS_Inference(cserver, request, &response));
126+
CREATE_GUARD(responseGuard, OVMS_InferenceResponse, response);
127+
uint32_t outputCount = 42;
128+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutputCount(response, &outputCount));
129+
uint32_t parameterCount = 42;
130+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseParameterCount(response, &parameterCount));
131+
const void* voutputData;
132+
size_t bytesize = 42;
133+
OVMS_DataType datatype = (OVMS_DataType)199;
134+
const int64_t* shape{nullptr};
135+
size_t dimCount = 42;
136+
OVMS_BufferType bufferType = (OVMS_BufferType)199;
137+
uint32_t deviceId = 42;
138+
const char* outputName{nullptr};
139+
for (size_t i = 0; i < outputCount; ++i) {
140+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
141+
if (std::find(outputsSet.begin(), outputsSet.end(), outputName) == outputsSet.end()) {
142+
output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize)));
107143
}
108-
std::vector<int64_t> inputShape{ovinputShape.begin(), ovinputShape.end()};
144+
}
145+
146+
}
147+
InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
148+
/////////////////////
149+
// PREPARE REQUEST
150+
/////////////////////
151+
OVMS_InferenceRequest* request{nullptr};
152+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion));
153+
CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request);
154+
155+
InferenceOutput output;
156+
OVMS_Status* status{nullptr};
157+
// PREPARE EACH INPUT
158+
for (const auto& [name, input_tensor] : input) {
159+
const char* realName = name.c_str();
160+
const auto& ovShape = input_tensor.get_shape();
161+
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
109162
OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type());
110-
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realInputName, inputDataType, inputShape.data(), inputShape.size()));
163+
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
111164
const uint32_t NOT_USED_NUM = 0;
112165
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request,
113-
realInputName,
166+
realName,
114167
reinterpret_cast<void*>(input_tensor.data()),
115168
input_tensor.get_byte_size(),
116169
OVMS_BUFFERTYPE_CPU,
@@ -130,7 +183,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
130183
std::stringstream ss;
131184
ss << "Inference in OVMSAdapter failed: ";
132185
ss << msg << " code: " << code;
133-
//LOG(INFO) << ss.str();
186+
// LOG(INFO) << ss.str();
134187
OVMS_StatusDelete(status);
135188
return output;
136189
}
@@ -149,7 +202,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
149202
const char* outputName{nullptr};
150203
for (size_t i = 0; i < outputCount; ++i) {
151204
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
152-
output[outputName] = makeOvTensor(datatype, shape, dimCount, voutputData, bytesize);
205+
output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize)));
153206
}
154207

155208
return output;
@@ -181,10 +234,18 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
181234
inputMinMax.second.emplace_back(shapeMax[i]);
182235
}
183236
this->inShapesMinMaxes.insert({tensorName, std::move(inputMinMax)});
237+
this->inputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)});
184238
}
185239
for (id = 0; id < outputCount; ++id) {
186240
ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax));
187241
outputNames.emplace_back(tensorName);
242+
shape_min_max_t outputMinMax;
243+
for (size_t i = 0; i < dimCount; ++i) {
244+
outputMinMax.first.emplace_back(shapeMin[i]);
245+
outputMinMax.second.emplace_back(shapeMax[i]);
246+
}
247+
this->outShapesMinMaxes.insert({tensorName, std::move(outputMinMax)});
248+
this->outputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)});
188249
}
189250
const ov::AnyMap* servableMetadataRtInfo;
190251
ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataInfo(servableMetadata, reinterpret_cast<const void**>(&servableMetadataRtInfo)));
@@ -195,15 +256,23 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
195256
this->modelConfig = (*servableMetadataRtInfo).at("model_info").as<ov::AnyMap>();
196257
}
197258
} catch (const std::exception& e) {
198-
//LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
259+
// LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
199260
this->modelConfig = ov::AnyMap{};
200261
}
201262
}
202263

264+
ov::element::Type_t OVMSInferenceAdapter::getInputDatatype(const std::string& inputName) const {
265+
return inputDatatypes.at(inputName);
266+
}
267+
268+
ov::element::Type_t OVMSInferenceAdapter::getOutputDatatype(const std::string& outputName) const {
269+
return outputDatatypes.at(outputName);
270+
}
271+
203272
ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputName) const {
204273
auto it = inShapesMinMaxes.find(inputName);
205274
if (it == inShapesMinMaxes.end()) {
206-
//LOG(INFO) << "Could not find input:" << inputName;
275+
// LOG(INFO) << "Could not find input:" << inputName;
207276
throw std::runtime_error(std::string("Adapter could not find input:") + inputName);
208277
}
209278

@@ -215,6 +284,21 @@ ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputNam
215284
}
216285
return ovShape;
217286
}
287+
ov::PartialShape OVMSInferenceAdapter::getOutputShape(const std::string& outputName) const {
288+
auto it = outShapesMinMaxes.find(outputName);
289+
if (it == outShapesMinMaxes.end()) {
290+
// LOG(INFO) << "Could not find output:" << outputName;
291+
throw std::runtime_error(std::string("Adapter could not find output:") + outputName);
292+
}
293+
294+
ov::PartialShape ovShape;
295+
const auto& [minBorder, maxBorder] = it->second;
296+
ovShape.reserve(minBorder.size());
297+
for (size_t i = 0; i < minBorder.size(); ++i) {
298+
ovShape.emplace_back(ov::Dimension{minBorder[i], maxBorder[i]});
299+
}
300+
return ovShape;
301+
}
218302

219303
std::vector<std::string> OVMSInferenceAdapter::getInputNames() const { return inputNames; }
220304

@@ -328,5 +412,4 @@ static ov::Tensor makeOvTensor(OVMS_DataType datatype, const int64_t* shape, siz
328412
}
329413

330414
#pragma GCC diagnostic pop
331-
} // namespace ovms
332-
} // namespace mediapipe
415+
} // namespace mediapipe::ovms

0 commit comments

Comments
 (0)