3232#pragma GCC diagnostic pop
3333// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe)
3434// for the one inside OVMS repo it makes sense to reuse code from ovms lib
35- namespace mediapipe {
36-
35+ namespace mediapipe ::ovms {
36+
3737using std::endl;
38+ #pragma GCC diagnostic push
39+ #pragma GCC diagnostic ignored "-Wunused-function"
40+ using InferenceOutput = std::map<std::string, ov::Tensor>;
41+ using InferenceInput = std::map<std::string, ov::Tensor>;
3842
39-
40- #define THROW_IF_CIRCULAR_ERR (C_API_CALL ) \
41- { \
42- auto * fatalErr = C_API_CALL; \
43- if (fatalErr != nullptr ) { \
44- std::runtime_error exc (" Getting status details circular error" ); \
45- throw exc; \
46- } \
47- }
43+ #define THROW_IF_CIRCULAR_ERR (C_API_CALL ) { auto * fatalErr = C_API_CALL;if (fatalErr != nullptr ) {std::runtime_error exc (" Getting status details circular error" );throw exc; } }
4844
4945#define ASSERT_CAPI_STATUS_NULL (C_API_CALL ) \
5046 { \
@@ -59,58 +55,115 @@ using std::endl;
5955 throw exc; \
6056 } \
6157 }
58+
6259#define CREATE_GUARD (GUARD_NAME, CAPI_TYPE, CAPI_PTR ) \
6360 std::unique_ptr<CAPI_TYPE, decltype (&(CAPI_TYPE##Delete))> GUARD_NAME (CAPI_PTR, &(CAPI_TYPE##Delete));
6461
65- #pragma GCC diagnostic push
66- #pragma GCC diagnostic ignored "-Wunused-function"
67- using InferenceOutput = std::map<std::string, ov::Tensor>;
68- using InferenceInput = std::map<std::string, ov::Tensor>;
69-
70- namespace ovms {
7162static OVMS_DataType OVPrecision2CAPI (ov::element::Type_t datatype);
7263static ov::element::Type_t CAPI2OVPrecision (OVMS_DataType datatype);
7364static ov::Tensor makeOvTensor (OVMS_DataType datatype, const int64_t * shape, size_t dimCount, const void * voutputData, size_t bytesize);
7465
75- OVMSInferenceAdapter::OVMSInferenceAdapter (const std::string& servableName, uint32_t servableVersion, OVMS_Server* cserver) :
76- servableName (servableName),
77- servableVersion (servableVersion) {
78- if (nullptr != cserver) {
79- this ->cserver = cserver;
80- } else {
81- OVMS_ServerNew (&this ->cserver );
82- }
66+ OVMSInferenceAdapter::~OVMSInferenceAdapter () {
67+ // LOG(INFO) << "OVMSAdapter destr";
8368}
8469
85- OVMSInferenceAdapter::~OVMSInferenceAdapter () {
86- // LOG(INFO) << "OVMSAdapter destr";
70+ inline std::vector<int64_t > getShapeAcceptableByCAPI (const ov::Shape& shape) {
71+ if (std::any_of (shape.begin (), shape.end (), [](size_t dim) {
72+ return dim > std::numeric_limits<int64_t >::max ();})) {
73+ throw std::runtime_error (" Cannot use C-API with dimension size greater than int64_t max value" );
74+ }
75+ return std::vector<int64_t >{shape.begin (), shape.end ()};
8776}
8877
89- InferenceOutput OVMSInferenceAdapter::infer (const InferenceInput& input) {
78+ void OVMSInferenceAdapter::infer (const InferenceInput& input, InferenceOutput& output ) {
9079 // ///////////////////
9180 // PREPARE REQUEST
9281 // ///////////////////
9382 OVMS_InferenceRequest* request{nullptr };
9483 ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestNew (&request, cserver, servableName.c_str (), servableVersion));
9584 CREATE_GUARD (requestGuard, OVMS_InferenceRequest, request);
96-
97- InferenceOutput output;
85+
9886 OVMS_Status* status{nullptr };
87+ std::vector<std::string> outputsSet;
9988 // PREPARE EACH INPUT
10089 // extract single tensor
10190 for (const auto & [name, input_tensor] : input) {
102- const char * realInputName = name.c_str ();
103- const auto & ovinputShape = input_tensor.get_shape ();
104- if (std::any_of (ovinputShape.begin (), ovinputShape.end (), [](size_t dim) {
105- return dim > std::numeric_limits<int64_t >::max ();})) {
106- throw std::runtime_error (" Cannot use C-API with dimension size greater than int64_t max value" );
91+ const char * realName = name.c_str ();
92+ const auto & ovShape = input_tensor.get_shape ();
93+ std::vector<int64_t > capiShape = getShapeAcceptableByCAPI (ovShape);
94+ OVMS_DataType inputDataType = OVPrecision2CAPI (input_tensor.get_element_type ());
95+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddInput (request, realName, inputDataType, capiShape.data (), capiShape.size ()));
96+ const uint32_t NOT_USED_NUM = 0 ;
97+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestInputSetData (request,
98+ realName,
99+ reinterpret_cast <void *>(input_tensor.data ()),
100+ input_tensor.get_byte_size (),
101+ OVMS_BUFFERTYPE_CPU,
102+ NOT_USED_NUM));
103+ }
104+ for (const auto & [name, output_tensor] : output) {
105+ outputsSet.emplace_back (name);
106+ const char * realName = name.c_str ();
107+ const auto & ovShape = output_tensor.get_shape ();
108+ std::vector<int64_t > capiShape = getShapeAcceptableByCAPI (ovShape);
109+ OVMS_DataType inputDataType = OVPrecision2CAPI (output_tensor.get_element_type ());
110+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddOutput (request, realName, inputDataType, capiShape.data (), capiShape.size ()));
111+ const uint32_t NOT_USED_NUM = 0 ;
112+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestOutputSetData (request,
113+ realName,
114+ reinterpret_cast <void *>(output_tensor.data ()),
115+ output_tensor.get_byte_size (),
116+ OVMS_BUFFERTYPE_CPU,
117+ NOT_USED_NUM));
118+
119+ }
120+
121+ // ////////////////
122+ // INFERENCE
123+ // ////////////////
124+ OVMS_InferenceResponse* response = nullptr ;
125+ ASSERT_CAPI_STATUS_NULL (OVMS_Inference (cserver, request, &response));
126+ CREATE_GUARD (responseGuard, OVMS_InferenceResponse, response);
127+ uint32_t outputCount = 42 ;
128+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseOutputCount (response, &outputCount));
129+ uint32_t parameterCount = 42 ;
130+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseParameterCount (response, ¶meterCount));
131+ const void * voutputData;
132+ size_t bytesize = 42 ;
133+ OVMS_DataType datatype = (OVMS_DataType)199 ;
134+ const int64_t * shape{nullptr };
135+ size_t dimCount = 42 ;
136+ OVMS_BufferType bufferType = (OVMS_BufferType)199 ;
137+ uint32_t deviceId = 42 ;
138+ const char * outputName{nullptr };
139+ for (size_t i = 0 ; i < outputCount; ++i) {
140+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseOutput (response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
141+ if (std::find (outputsSet.begin (), outputsSet.end (), outputName) == outputsSet.end ()) {
142+ output.emplace (outputName, std::move (makeOvTensor (datatype, shape, dimCount, voutputData, bytesize)));
107143 }
108- std::vector<int64_t > inputShape{ovinputShape.begin (), ovinputShape.end ()};
144+ }
145+
146+ }
147+ InferenceOutput OVMSInferenceAdapter::infer (const InferenceInput& input) {
148+ // ///////////////////
149+ // PREPARE REQUEST
150+ // ///////////////////
151+ OVMS_InferenceRequest* request{nullptr };
152+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestNew (&request, cserver, servableName.c_str (), servableVersion));
153+ CREATE_GUARD (requestGuard, OVMS_InferenceRequest, request);
154+
155+ InferenceOutput output;
156+ OVMS_Status* status{nullptr };
157+ // PREPARE EACH INPUT
158+ for (const auto & [name, input_tensor] : input) {
159+ const char * realName = name.c_str ();
160+ const auto & ovShape = input_tensor.get_shape ();
161+ std::vector<int64_t > capiShape = getShapeAcceptableByCAPI (ovShape);
109162 OVMS_DataType inputDataType = OVPrecision2CAPI (input_tensor.get_element_type ());
110- ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddInput (request, realInputName , inputDataType, inputShape .data (), inputShape .size ()));
163+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddInput (request, realName , inputDataType, capiShape .data (), capiShape .size ()));
111164 const uint32_t NOT_USED_NUM = 0 ;
112165 ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestInputSetData (request,
113- realInputName ,
166+ realName ,
114167 reinterpret_cast <void *>(input_tensor.data ()),
115168 input_tensor.get_byte_size (),
116169 OVMS_BUFFERTYPE_CPU,
@@ -130,7 +183,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
130183 std::stringstream ss;
131184 ss << " Inference in OVMSAdapter failed: " ;
132185 ss << msg << " code: " << code;
133- // LOG(INFO) << ss.str();
186+ // LOG(INFO) << ss.str();
134187 OVMS_StatusDelete (status);
135188 return output;
136189 }
@@ -149,7 +202,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
149202 const char * outputName{nullptr };
150203 for (size_t i = 0 ; i < outputCount; ++i) {
151204 ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseOutput (response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
152- output[ outputName] = makeOvTensor (datatype, shape, dimCount, voutputData, bytesize);
205+ output. emplace ( outputName, std::move ( makeOvTensor (datatype, shape, dimCount, voutputData, bytesize)) );
153206 }
154207
155208 return output;
@@ -181,10 +234,18 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
181234 inputMinMax.second .emplace_back (shapeMax[i]);
182235 }
183236 this ->inShapesMinMaxes .insert ({tensorName, std::move (inputMinMax)});
237+ this ->inputDatatypes .insert ({tensorName, CAPI2OVPrecision (datatype)});
184238 }
185239 for (id = 0 ; id < outputCount; ++id) {
186240 ASSERT_CAPI_STATUS_NULL (OVMS_ServableMetadataOutput (servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax));
187241 outputNames.emplace_back (tensorName);
242+ shape_min_max_t outputMinMax;
243+ for (size_t i = 0 ; i < dimCount; ++i) {
244+ outputMinMax.first .emplace_back (shapeMin[i]);
245+ outputMinMax.second .emplace_back (shapeMax[i]);
246+ }
247+ this ->outShapesMinMaxes .insert ({tensorName, std::move (outputMinMax)});
248+ this ->outputDatatypes .insert ({tensorName, CAPI2OVPrecision (datatype)});
188249 }
189250 const ov::AnyMap* servableMetadataRtInfo;
190251 ASSERT_CAPI_STATUS_NULL (OVMS_ServableMetadataInfo (servableMetadata, reinterpret_cast <const void **>(&servableMetadataRtInfo)));
@@ -195,15 +256,23 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
195256 this ->modelConfig = (*servableMetadataRtInfo).at (" model_info" ).as <ov::AnyMap>();
196257 }
197258 } catch (const std::exception& e) {
198- // LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
259+ // LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
199260 this ->modelConfig = ov::AnyMap{};
200261 }
201262}
202263
264+ ov::element::Type_t OVMSInferenceAdapter::getInputDatatype (const std::string& inputName) const {
265+ return inputDatatypes.at (inputName);
266+ }
267+
268+ ov::element::Type_t OVMSInferenceAdapter::getOutputDatatype (const std::string& outputName) const {
269+ return outputDatatypes.at (outputName);
270+ }
271+
203272ov::PartialShape OVMSInferenceAdapter::getInputShape (const std::string& inputName) const {
204273 auto it = inShapesMinMaxes.find (inputName);
205274 if (it == inShapesMinMaxes.end ()) {
206- // LOG(INFO) << "Could not find input:" << inputName;
275+ // LOG(INFO) << "Could not find input:" << inputName;
207276 throw std::runtime_error (std::string (" Adapter could not find input:" ) + inputName);
208277 }
209278
@@ -215,6 +284,21 @@ ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputNam
215284 }
216285 return ovShape;
217286}
287+ ov::PartialShape OVMSInferenceAdapter::getOutputShape (const std::string& outputName) const {
288+ auto it = outShapesMinMaxes.find (outputName);
289+ if (it == outShapesMinMaxes.end ()) {
290+ // LOG(INFO) << "Could not find output:" << outputName;
291+ throw std::runtime_error (std::string (" Adapter could not find output:" ) + outputName);
292+ }
293+
294+ ov::PartialShape ovShape;
295+ const auto & [minBorder, maxBorder] = it->second ;
296+ ovShape.reserve (minBorder.size ());
297+ for (size_t i = 0 ; i < minBorder.size (); ++i) {
298+ ovShape.emplace_back (ov::Dimension{minBorder[i], maxBorder[i]});
299+ }
300+ return ovShape;
301+ }
218302
219303std::vector<std::string> OVMSInferenceAdapter::getInputNames () const { return inputNames; }
220304
@@ -328,5 +412,4 @@ static ov::Tensor makeOvTensor(OVMS_DataType datatype, const int64_t* shape, siz
328412}
329413
330414#pragma GCC diagnostic pop
331- } // namespace ovms
332- } // namespace mediapipe
415+ } // namespace mediapipe::ovms
0 commit comments