updat_to_latest

mgumowsk · mgumowsk · commit aa7f7a4c3ba5 · 2024-11-21T09:57:37.000+01:00
diff --git a/model_api/cpp/adapters/include/adapters/ovms_adapter.h b/model_api/cpp/adapters/include/adapters/ovms_adapter.h
@@ -26,13 +26,15 @@
 #include "adapters/inference_adapter.h"
 #include <openvino/openvino.hpp>
 
+#include "ovms.h"  // NOLINT
+
 // here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe)
 // for the one inside OVMS repo it makes sense to reuse code from ovms lib
 
 class OVMS_Server_;
 typedef struct OVMS_Server_ OVMS_Server;
-namespace mediapipe {
-namespace ovms {
+
+namespace mediapipe::ovms {
 
 using InferenceOutput = std::map<std::string, ov::Tensor>;
 using InferenceInput = std::map<std::string, ov::Tensor>;
@@ -47,12 +49,25 @@ class OVMSInferenceAdapter : public ::InferenceAdapter {
     std::vector<std::string> inputNames;
     std::vector<std::string> outputNames;
     shapes_min_max_t inShapesMinMaxes;
+    shapes_min_max_t outShapesMinMaxes;
+    std::unordered_map<std::string, ov::element::Type_t> inputDatatypes;
+    std::unordered_map<std::string, ov::element::Type_t> outputDatatypes;
     ov::AnyMap modelConfig;
 
 public:
-    OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion = 0, OVMS_Server* server = nullptr);
+    // TODO Windows: Fix definition in header - does not compile in cpp.
+    OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion = 0, OVMS_Server* server = nullptr) :
+        servableName(servableName),
+        servableVersion(servableVersion) {
+        if (nullptr != server) {
+            this->cserver = server;
+        } else {
+            OVMS_ServerNew(&this->cserver);
+        }
+    }
     virtual ~OVMSInferenceAdapter();
     InferenceOutput infer(const InferenceInput& input) override;
+    void infer(const InferenceInput& input, InferenceOutput& output) override;
     void loadModel(const std::shared_ptr<const ov::Model>& model, ov::Core& core,
         const std::string& device, const ov::AnyMap& compilationConfig, size_t max_num_requests = 1) override;
     void inferAsync(const InferenceInput& input, const CallbackData callback_args) override;
@@ -62,9 +77,11 @@ class OVMSInferenceAdapter : public ::InferenceAdapter {
     void awaitAny();
     size_t getNumAsyncExecutors() const;
     ov::PartialShape getInputShape(const std::string& inputName) const override;
+    ov::PartialShape getOutputShape(const std::string& outputName) const override;
+    ov::element::Type_t getInputDatatype(const std::string& inputName) const override;
+    ov::element::Type_t getOutputDatatype(const std::string& outputName) const override;
     std::vector<std::string> getInputNames() const override;
     std::vector<std::string> getOutputNames() const override;
     const ov::AnyMap& getModelConfig() const override;
 };
-}  // namespace ovms
-}  // namespace mediapipe
+}  // namespace mediapipe::ovms
diff --git a/model_api/cpp/adapters/src/ovms_adapter.cpp b/model_api/cpp/adapters/src/ovms_adapter.cpp
@@ -32,19 +32,15 @@
 #pragma GCC diagnostic pop
 // here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe)
 // for the one inside OVMS repo it makes sense to reuse code from ovms lib
-namespace mediapipe {
-
+namespace mediapipe::ovms {
+    
 using std::endl;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+using InferenceOutput = std::map<std::string, ov::Tensor>;
+using InferenceInput = std::map<std::string, ov::Tensor>;
 
-
-#define THROW_IF_CIRCULAR_ERR(C_API_CALL)                                       \
-    {                                                                           \
-        auto* fatalErr = C_API_CALL;                                            \
-        if (fatalErr != nullptr) {                                              \
-            std::runtime_error exc("Getting status details circular error");    \
-            throw exc;                                                          \
-        }                                                                       \ 
-    }
+#define THROW_IF_CIRCULAR_ERR(C_API_CALL) { auto* fatalErr = C_API_CALL;if (fatalErr != nullptr) {std::runtime_error exc("Getting status details circular error");throw exc; } }
 
 #define ASSERT_CAPI_STATUS_NULL(C_API_CALL)                                                 \
     {                                                                                       \
@@ -59,58 +55,115 @@ using std::endl;
             throw exc;                                                                      \
         }                                                                                   \
     }
+
 #define CREATE_GUARD(GUARD_NAME, CAPI_TYPE, CAPI_PTR) \
     std::unique_ptr<CAPI_TYPE, decltype(&(CAPI_TYPE##Delete))> GUARD_NAME(CAPI_PTR, &(CAPI_TYPE##Delete));
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-using InferenceOutput = std::map<std::string, ov::Tensor>;
-using InferenceInput = std::map<std::string, ov::Tensor>;
-
-namespace ovms {
 static OVMS_DataType OVPrecision2CAPI(ov::element::Type_t datatype);
 static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype);
 static ov::Tensor makeOvTensor(OVMS_DataType datatype, const int64_t* shape, size_t dimCount, const void* voutputData, size_t bytesize);
 
-OVMSInferenceAdapter::OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion, OVMS_Server* cserver) :
-    servableName(servableName),
-    servableVersion(servableVersion) {
-    if (nullptr != cserver) {
-        this->cserver = cserver;
-    } else {
-        OVMS_ServerNew(&this->cserver);
-    }
+OVMSInferenceAdapter::~OVMSInferenceAdapter() {
+    // LOG(INFO) << "OVMSAdapter destr";
 }
 
-OVMSInferenceAdapter::~OVMSInferenceAdapter() {
-    //LOG(INFO) << "OVMSAdapter destr";
+inline std::vector<int64_t> getShapeAcceptableByCAPI(const ov::Shape& shape) {
+    if (std::any_of(shape.begin(), shape.end(), [](size_t dim) {
+            return dim > std::numeric_limits<int64_t>::max();})) {
+        throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value");
+    }
+    return std::vector<int64_t>{shape.begin(), shape.end()};
 }
 
-InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
+void OVMSInferenceAdapter::infer(const InferenceInput& input, InferenceOutput& output) {
     /////////////////////
     // PREPARE REQUEST
     /////////////////////
     OVMS_InferenceRequest* request{nullptr};
     ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion));
     CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request);
-
-    InferenceOutput output;
+    
     OVMS_Status* status{nullptr};
+    std::vector<std::string> outputsSet;
     // PREPARE EACH INPUT
     // extract single tensor
     for (const auto& [name, input_tensor] : input) {
-        const char* realInputName = name.c_str();
-        const auto& ovinputShape = input_tensor.get_shape();
-        if (std::any_of(ovinputShape.begin(), ovinputShape.end(), [](size_t dim) {
-                return dim > std::numeric_limits<int64_t>::max();})) {
-            throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value");
+        const char* realName = name.c_str();
+        const auto& ovShape = input_tensor.get_shape();
+        std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
+        OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type());
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
+        const uint32_t NOT_USED_NUM = 0;
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request,
+            realName,
+            reinterpret_cast<void*>(input_tensor.data()),
+            input_tensor.get_byte_size(),
+            OVMS_BUFFERTYPE_CPU,
+            NOT_USED_NUM));
+    }
+    for (const auto& [name, output_tensor] : output) {
+        outputsSet.emplace_back(name);
+        const char* realName = name.c_str();
+        const auto& ovShape = output_tensor.get_shape();
+        std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
+        OVMS_DataType inputDataType = OVPrecision2CAPI(output_tensor.get_element_type());
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddOutput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
+        const uint32_t NOT_USED_NUM = 0;
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestOutputSetData(request,
+            realName,
+            reinterpret_cast<void*>(output_tensor.data()),
+            output_tensor.get_byte_size(),
+            OVMS_BUFFERTYPE_CPU,
+            NOT_USED_NUM));
+
+    }
+
+    //////////////////
+    //  INFERENCE
+    //////////////////
+    OVMS_InferenceResponse* response = nullptr;
+    ASSERT_CAPI_STATUS_NULL(OVMS_Inference(cserver, request, &response));
+    CREATE_GUARD(responseGuard, OVMS_InferenceResponse, response);
+    uint32_t outputCount = 42;
+    ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutputCount(response, &outputCount));
+    uint32_t parameterCount = 42;
+    ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseParameterCount(response, &parameterCount));
+    const void* voutputData;
+    size_t bytesize = 42;
+    OVMS_DataType datatype = (OVMS_DataType)199;
+    const int64_t* shape{nullptr};
+    size_t dimCount = 42;
+    OVMS_BufferType bufferType = (OVMS_BufferType)199;
+    uint32_t deviceId = 42;
+    const char* outputName{nullptr};
+    for (size_t i = 0; i < outputCount; ++i) {
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
+        if (std::find(outputsSet.begin(), outputsSet.end(), outputName) == outputsSet.end()) {
+            output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize)));
         }
-        std::vector<int64_t> inputShape{ovinputShape.begin(), ovinputShape.end()};
+    }
+
+}
+InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
+    /////////////////////
+    // PREPARE REQUEST
+    /////////////////////
+    OVMS_InferenceRequest* request{nullptr};
+    ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion));
+    CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request);
+    
+    InferenceOutput output;
+    OVMS_Status* status{nullptr};
+    // PREPARE EACH INPUT
+    for (const auto& [name, input_tensor] : input) {
+        const char* realName = name.c_str();
+        const auto& ovShape = input_tensor.get_shape();
+        std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape);
         OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type());
-        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realInputName, inputDataType, inputShape.data(), inputShape.size()));
+        ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size()));
         const uint32_t NOT_USED_NUM = 0;
         ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request,
-            realInputName,
+            realName,
             reinterpret_cast<void*>(input_tensor.data()),
             input_tensor.get_byte_size(),
             OVMS_BUFFERTYPE_CPU,
@@ -130,7 +183,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
         std::stringstream ss;
         ss << "Inference in OVMSAdapter failed: ";
         ss << msg << " code: " << code;
-        //LOG(INFO) << ss.str();
+        // LOG(INFO) << ss.str();
         OVMS_StatusDelete(status);
         return output;
     }
@@ -149,7 +202,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
     const char* outputName{nullptr};
     for (size_t i = 0; i < outputCount; ++i) {
         ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
-        output[outputName] = makeOvTensor(datatype, shape, dimCount, voutputData, bytesize);
+        output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize)));
     }
 
     return output;
@@ -181,10 +234,18 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
             inputMinMax.second.emplace_back(shapeMax[i]);
         }
         this->inShapesMinMaxes.insert({tensorName, std::move(inputMinMax)});
+        this->inputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)});
     }
     for (id = 0; id < outputCount; ++id) {
         ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax));
         outputNames.emplace_back(tensorName);
+        shape_min_max_t outputMinMax;
+        for (size_t i = 0; i < dimCount; ++i) {
+            outputMinMax.first.emplace_back(shapeMin[i]);
+            outputMinMax.second.emplace_back(shapeMax[i]);
+        }
+        this->outShapesMinMaxes.insert({tensorName, std::move(outputMinMax)});
+        this->outputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)});
     }
     const ov::AnyMap* servableMetadataRtInfo;
     ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataInfo(servableMetadata, reinterpret_cast<const void**>(&servableMetadataRtInfo)));
@@ -195,15 +256,23 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
             this->modelConfig = (*servableMetadataRtInfo).at("model_info").as<ov::AnyMap>();
         }
     } catch (const std::exception& e) {
-        //LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
+        // LOG(INFO) << "Exception occurred while accessing model_info: " << e.what();
         this->modelConfig = ov::AnyMap{};
     }
 }
 
+ov::element::Type_t OVMSInferenceAdapter::getInputDatatype(const std::string& inputName) const {
+    return inputDatatypes.at(inputName);
+}
+
+ov::element::Type_t OVMSInferenceAdapter::getOutputDatatype(const std::string& outputName) const {
+    return outputDatatypes.at(outputName);
+}
+
 ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputName) const {
     auto it = inShapesMinMaxes.find(inputName);
     if (it == inShapesMinMaxes.end()) {
-        //LOG(INFO) << "Could not find input:" << inputName;
+        // LOG(INFO) << "Could not find input:" << inputName;
         throw std::runtime_error(std::string("Adapter could not find input:") + inputName);
     }
 
@@ -215,6 +284,21 @@ ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputNam
     }
     return ovShape;
 }
+ov::PartialShape OVMSInferenceAdapter::getOutputShape(const std::string& outputName) const {
+    auto it = outShapesMinMaxes.find(outputName);
+    if (it == outShapesMinMaxes.end()) {
+        // LOG(INFO) << "Could not find output:" << outputName;
+        throw std::runtime_error(std::string("Adapter could not find output:") + outputName);
+    }
+
+    ov::PartialShape ovShape;
+    const auto& [minBorder, maxBorder] = it->second;
+    ovShape.reserve(minBorder.size());
+    for (size_t i = 0; i < minBorder.size(); ++i) {
+        ovShape.emplace_back(ov::Dimension{minBorder[i], maxBorder[i]});
+    }
+    return ovShape;
+}
 
 std::vector<std::string> OVMSInferenceAdapter::getInputNames() const { return inputNames; }
 
@@ -328,5 +412,4 @@ static ov::Tensor makeOvTensor(OVMS_DataType datatype, const int64_t* shape, siz
 }
 
 #pragma GCC diagnostic pop
-}  // namespace ovms
-}  // namespace mediapipe
+}  // namespace mediapipe::ovms