diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.cpp new file mode 100644 index 0000000000..cbb9817b8c --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.cpp @@ -0,0 +1,57 @@ +#include "CVProcessing.h" +#include +#include +#include +#include +#include + +namespace rnexecutorch::cv_processing { + +float computeIoU(const BBox &a, const BBox &b) { + float x1 = std::max(a.x1, b.x1); + float y1 = std::max(a.y1, b.y1); + float x2 = std::min(a.x2, b.x2); + float y2 = std::min(a.y2, b.y2); + + float intersectionArea = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1); + float areaA = a.area(); + float areaB = b.area(); + float unionArea = areaA + areaB - intersectionArea; + + return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f; +} + +std::optional validateNormParam(const std::vector &values, + const char *paramName) { + if (values.size() == 3) { + return cv::Scalar(values[0], values[1], values[2]); + } else if (!values.empty()) { + log(LOG_LEVEL::Warn, + std::string(paramName) + + " must have 3 elements — ignoring provided value."); + } + return std::nullopt; +} + +std::set +prepareAllowedClasses(const std::vector &classIndices) { + std::set allowedClasses; + if (!classIndices.empty()) { + allowedClasses.insert(classIndices.begin(), classIndices.end()); + } + return allowedClasses; +} + +void validateThresholds(double confidenceThreshold, double iouThreshold) { + if (confidenceThreshold < 0.0 || confidenceThreshold > 1.0) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, + "Confidence threshold must be in range [0, 1]."); + } + + if (iouThreshold < 0.0 || iouThreshold > 1.0) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, + "IoU threshold must be in range [0, 1]."); + } +} + +} // namespace rnexecutorch::cv_processing diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.h new file mode 100644 index 0000000000..091631a779 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVProcessing.h @@ -0,0 +1,107 @@ +#pragma once + +#include "CVTypes.h" +#include +#include +#include +#include +#include + +namespace rnexecutorch::cv_processing { + +/** + * @brief Compute Intersection over Union (IoU) between two bounding boxes + * @param a First bounding box + * @param b Second bounding box + * @return IoU value between 0.0 and 1.0 + * + * Moved from utils/computer_vision/Processing.h for consolidation. + */ +float computeIoU(const BBox &a, const BBox &b); + +/** + * @brief Non-Maximum Suppression for detection/segmentation results + * @tparam T Type that has bbox and score fields (satisfies HasBBoxAndScore) + * @param items Vector of items to filter + * @param iouThreshold IoU threshold for suppression (typically 0.5) + * @return Filtered vector with overlapping detections removed + * + * Moved from utils/computer_vision/Processing.h for consolidation. + * Handles both class-aware and class-agnostic NMS automatically. + */ +template +std::vector nonMaxSuppression(std::vector items, double iouThreshold) { + if (items.empty()) { + return {}; + } + + // Sort by score in descending order + std::ranges::sort(items, + [](const T &a, const T &b) { return a.score > b.score; }); + + std::vector result; + std::vector suppressed(items.size(), false); + + for (size_t i = 0; i < items.size(); ++i) { + if (suppressed[i]) { + continue; + } + + result.push_back(items[i]); + + // Suppress overlapping boxes + for (size_t j = i + 1; j < items.size(); ++j) { + if (suppressed[j]) { + continue; + } + + // If type has classIndex, only suppress boxes of same class + if constexpr (requires(T t) { t.classIndex; }) { + if (items[i].classIndex != items[j].classIndex) { + continue; + } + } + + float iou = computeIoU(items[i].bbox, items[j].bbox); + if (iou > iouThreshold) { + suppressed[j] = true; + } + } + } + + return result; +} + +/** + * @brief Validate and convert normalization parameter vector to cv::Scalar + * @param values Vector of normalization values (should have 3 elements for RGB) + * @param paramName Parameter name for logging (e.g., "normMean", "normStd") + * @return Optional cv::Scalar if valid (3 elements), nullopt otherwise + * + * Replaces duplicate validation logic across ObjectDetection, + * BaseInstanceSegmentation, and BaseSemanticSegmentation. + */ +std::optional validateNormParam(const std::vector &values, + const char *paramName); + +/** + * @brief Convert class indices vector to a set for efficient filtering + * @param classIndices Vector of class indices to allow + * @return Set of allowed class indices (empty set = allow all classes) + * + * Used by detection and segmentation models to filter results by class. + */ +std::set +prepareAllowedClasses(const std::vector &classIndices); + +/** + * @brief Validate confidence and IoU thresholds are in valid range [0, 1] + * @param confidenceThreshold Detection confidence threshold + * @param iouThreshold Non-maximum suppression IoU threshold + * @throws RnExecutorchError if either threshold is out of range + * + * Used by detection and segmentation models to validate user input. + */ +void validateThresholds(double confidenceThreshold, double iouThreshold); + +} // namespace rnexecutorch::cv_processing diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/CVTypes.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVTypes.h new file mode 100644 index 0000000000..4a146d2180 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/CVTypes.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include + +namespace rnexecutorch::cv_processing { + +/** + * @brief Bounding box representation with x1, y1, x2, y2 coordinates + * + * Moved from utils/computer_vision/Types.h for consolidation. + */ +struct BBox { + float x1, y1, x2, y2; + + float width() const { return x2 - x1; } + + float height() const { return y2 - y1; } + + float area() const { return width() * height(); } + + bool isValid() const { + return x2 > x1 && y2 > y1 && x1 >= 0.0f && y1 >= 0.0f; + } + + BBox scale(float widthRatio, float heightRatio) const { + return {x1 * widthRatio, y1 * heightRatio, x2 * widthRatio, + y2 * heightRatio}; + } +}; + +/** + * @brief Concept for types that have a bounding box and confidence score + * + * Used for NMS and other detection/segmentation operations. + */ +template +concept HasBBoxAndScore = requires(T t) { + { t.bbox } -> std::convertible_to; + { t.score } -> std::convertible_to; +}; + +/** + * @brief Scale ratios for mapping between original and model input dimensions + * + * Replaces duplicate scale ratio calculation code across multiple models. + */ +struct ScaleRatios { + float widthRatio; + float heightRatio; + + /** + * @brief Compute scale ratios from original size to model input size + * @param original Original image dimensions + * @param model Model input dimensions + * @return ScaleRatios struct containing width and height ratios + */ + static ScaleRatios compute(cv::Size original, cv::Size model) { + return {static_cast(original.width) / model.width, + static_cast(original.height) / model.height}; + } +}; + +} // namespace rnexecutorch::cv_processing diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 7b389d45b6..f81077a572 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -25,7 +26,6 @@ #include #include #include -#include using namespace rnexecutorch::models::speech_to_text; @@ -433,7 +433,7 @@ getJsiValue(const std::unordered_map &map, return mapObj; } -inline jsi::Value getJsiValue(const utils::computer_vision::BBox &bbox, +inline jsi::Value getJsiValue(const cv_processing::BBox &bbox, jsi::Runtime &runtime) { jsi::Object obj(runtime); obj.setProperty(runtime, "x1", bbox.x1); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index cc9c862b32..b084de9ab7 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -1,6 +1,7 @@ #include "VisionModel.h" #include #include +#include #include #include @@ -18,6 +19,18 @@ void VisionModel::unload() noexcept { } cv::Size VisionModel::modelInputSize() const { + // For multi-method models, query the currently loaded method's input shape + if (!currentlyLoadedMethod_.empty()) { + auto inputShapes = getAllInputShapes(currentlyLoadedMethod_); + if (!inputShapes.empty() && !inputShapes[0].empty() && + inputShapes[0].size() >= 2) { + const auto &shape = inputShapes[0]; + return {static_cast(shape[shape.size() - 2]), + static_cast(shape[shape.size() - 1])}; + } + } + + // Default: use cached modelInputShape_ from single-method models if (modelInputShape_.size() < 2) { return {0, 0}; } @@ -51,4 +64,42 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { return ::rnexecutorch::utils::pixelsToMat(tensorView); } +void VisionModel::ensureMethodLoaded(const std::string &methodName) { + if (methodName.empty()) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidConfig, + "Method name cannot be empty. Use 'forward' for single-method models " + "or 'forward_{inputSize}' for multi-method models."); + } + + if (currentlyLoadedMethod_ == methodName) { + return; + } + + if (!module_) { + throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, + "Model not loaded. Cannot load method '" + + methodName + "'."); + } + + if (!currentlyLoadedMethod_.empty()) { + module_->unload_method(currentlyLoadedMethod_); + } + + auto loadResult = module_->load_method(methodName); + if (loadResult != executorch::runtime::Error::Ok) { + throw RnExecutorchError( + loadResult, "Failed to load method '" + methodName + + "'. Ensure the method exists in the exported model."); + } + + currentlyLoadedMethod_ = methodName; +} + +void VisionModel::initializeNormalization(const std::vector &normMean, + const std::vector &normStd) { + normMean_ = cv_processing::validateNormParam(normMean, "normMean"); + normStd_ = cv_processing::validateNormParam(normStd, "normStd"); +} + } // namespace rnexecutorch::models diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index cf003948af..cdfe2c1ab6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,42 @@ class VisionModel : public BaseModel { */ mutable std::mutex inference_mutex_; + /// Name of the currently loaded method (for multi-method models). + /// Empty for single-method models using default "forward". + std::string currentlyLoadedMethod_; + + /// Optional per-channel mean for input normalisation. + std::optional normMean_; + + /// Optional per-channel standard deviation for input normalisation. + std::optional normStd_; + + /** + * @brief Ensures the specified method is loaded, unloading any previous + * method if necessary. + * + * For single-method models, pass "forward" (the default). + * For multi-method models, pass the specific method name (e.g., + * "forward_384"). + * + * @param methodName Name of the method to load. Defaults to "forward". + * @throws RnExecutorchError if the method cannot be loaded. + */ + void ensureMethodLoaded(const std::string &methodName = "forward"); + + /** + * @brief Initializes normalization parameters from vectors. + * + * Uses cv_processing::validateNormParam() for validation. + * + * @param normMean Per-channel mean values (must be exactly 3 elements, or + * empty to skip). + * @param normStd Per-channel std dev values (must be exactly 3 elements, or + * empty to skip). + */ + void initializeNormalization(const std::vector &normMean, + const std::vector &normStd); + /** * @brief Resize an RGB image to the model's expected input size * diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp index 3d2f9d1715..85ffc65152 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.cpp @@ -4,11 +4,10 @@ #include #include #include -#include +#include #include #include #include -#include namespace rnexecutorch::models::instance_segmentation { @@ -17,31 +16,7 @@ BaseInstanceSegmentation::BaseInstanceSegmentation( std::vector normStd, bool applyNMS, std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker), applyNMS_(applyNMS) { - - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } -} - -cv::Size BaseInstanceSegmentation::modelInputSize() const { - if (currentlyLoadedMethod_.empty()) { - return VisionModel::modelInputSize(); - } - auto inputShapes = getAllInputShapes(currentlyLoadedMethod_); - if (inputShapes.empty() || inputShapes[0].size() < 2) { - return VisionModel::modelInputSize(); - } - const auto &shape = inputShapes[0]; - return {shape[shape.size() - 2], shape[shape.size() - 1]}; + initializeNormalization(normMean, normStd); } TensorPtr BaseInstanceSegmentation::buildInputTensor(const cv::Mat &image) { @@ -75,7 +50,7 @@ std::vector BaseInstanceSegmentation::runInference( cv::Size modelInputSize(shape[shape.size() - 2], shape[shape.size() - 1]); cv::Size originalSize(image.cols, image.rows); - validateThresholds(confidenceThreshold, iouThreshold); + cv_processing::validateThresholds(confidenceThreshold, iouThreshold); auto forwardResult = BaseModel::execute(methodName, {buildInputTensor(image)}); @@ -144,13 +119,12 @@ std::vector BaseInstanceSegmentation::generateFromPixels( classIndices, returnMaskAtOriginalResolution, methodName); } -std::tuple +std::tuple BaseInstanceSegmentation::extractDetectionData(const float *bboxData, const float *scoresData, int32_t index) { - utils::computer_vision::BBox bbox{ - bboxData[index * 4], bboxData[index * 4 + 1], bboxData[index * 4 + 2], - bboxData[index * 4 + 3]}; + cv_processing::BBox bbox{bboxData[index * 4], bboxData[index * 4 + 1], + bboxData[index * 4 + 2], bboxData[index * 4 + 3]}; float score = scoresData[index * 2]; int32_t label = static_cast(scoresData[index * 2 + 1]); @@ -158,7 +132,7 @@ BaseInstanceSegmentation::extractDetectionData(const float *bboxData, } cv::Rect BaseInstanceSegmentation::computeMaskCropRect( - const utils::computer_vision::BBox &bboxModel, cv::Size modelInputSize, + const cv_processing::BBox &bboxModel, cv::Size modelInputSize, cv::Size maskSize) { float mx1F = bboxModel.x1 * maskSize.width / modelInputSize.width; @@ -187,7 +161,7 @@ cv::Rect BaseInstanceSegmentation::addPaddingToRect(const cv::Rect &rect, cv::Mat BaseInstanceSegmentation::warpToOriginalResolution( const cv::Mat &probMat, const cv::Rect &maskRect, cv::Size originalSize, - cv::Size maskSize, const utils::computer_vision::BBox &bboxOriginal) { + cv::Size maskSize, const cv_processing::BBox &bboxOriginal) { float scaleX = static_cast(originalSize.width) / maskSize.width; float scaleY = static_cast(originalSize.height) / maskSize.height; @@ -211,8 +185,8 @@ cv::Mat BaseInstanceSegmentation::thresholdToBinary(const cv::Mat &probMat) { } cv::Mat BaseInstanceSegmentation::processMaskFromLogits( - const cv::Mat &logitsMat, const utils::computer_vision::BBox &bboxModel, - const utils::computer_vision::BBox &bboxOriginal, cv::Size modelInputSize, + const cv::Mat &logitsMat, const cv_processing::BBox &bboxModel, + const cv_processing::BBox &bboxOriginal, cv::Size modelInputSize, cv::Size originalSize, bool warpToOriginal) { cv::Size maskSize = logitsMat.size(); @@ -232,22 +206,6 @@ cv::Mat BaseInstanceSegmentation::processMaskFromLogits( return thresholdToBinary(probMat); } -void BaseInstanceSegmentation::validateThresholds(double confidenceThreshold, - double iouThreshold) const { - if (confidenceThreshold < 0 || confidenceThreshold > 1) { - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidConfig, - "Confidence threshold must be greater or equal to 0 " - "and less than or equal to 1."); - } - - if (iouThreshold < 0 || iouThreshold > 1) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, - "IoU threshold must be greater or equal to 0 " - "and less than or equal to 1."); - } -} - void BaseInstanceSegmentation::validateOutputTensors( const std::vector &tensors) const { if (tensors.size() != 3) { @@ -258,55 +216,12 @@ void BaseInstanceSegmentation::validateOutputTensors( } } -std::set BaseInstanceSegmentation::prepareAllowedClasses( - const std::vector &classIndices) const { - std::set allowedClasses; - if (!classIndices.empty()) { - allowedClasses.insert(classIndices.begin(), classIndices.end()); - } - return allowedClasses; -} - -void BaseInstanceSegmentation::ensureMethodLoaded( - const std::string &methodName) { - if (methodName.empty()) { - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidConfig, - "Method name cannot be empty. Use 'forward' for single-method models " - "or 'forward_{inputSize}' for multi-method models."); - } - - if (currentlyLoadedMethod_ == methodName) { - return; - } - - if (!module_) { - throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, - "Model not loaded. Cannot load method '" + - methodName + "'."); - } - - if (!currentlyLoadedMethod_.empty()) { - module_->unload_method(currentlyLoadedMethod_); - } - - auto loadResult = module_->load_method(methodName); - if (loadResult != executorch::runtime::Error::Ok) { - throw RnExecutorchError( - loadResult, "Failed to load method '" + methodName + - "'. Ensure the method exists in the exported model."); - } - - currentlyLoadedMethod_ = methodName; -} - std::vector BaseInstanceSegmentation::finalizeInstances( std::vector instances, double iouThreshold, int32_t maxInstances) const { if (applyNMS_) { - instances = - utils::computer_vision::nonMaxSuppression(instances, iouThreshold); + instances = cv_processing::nonMaxSuppression(instances, iouThreshold); } if (std::cmp_greater(instances.size(), maxInstances)) { @@ -326,7 +241,7 @@ std::vector BaseInstanceSegmentation::collectInstances( static_cast(originalSize.width) / modelInputSize.width; float heightRatio = static_cast(originalSize.height) / modelInputSize.height; - auto allowedClasses = prepareAllowedClasses(classIndices); + auto allowedClasses = cv_processing::prepareAllowedClasses(classIndices); // CONTRACT auto bboxTensor = tensors[0].toTensor(); // [1, N, 4] @@ -357,8 +272,7 @@ std::vector BaseInstanceSegmentation::collectInstances( continue; } - utils::computer_vision::BBox bboxOriginal = - bboxModel.scale(widthRatio, heightRatio); + cv_processing::BBox bboxOriginal = bboxModel.scale(widthRatio, heightRatio); if (!bboxOriginal.isValid()) { continue; } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h index 341d0f2235..d59400e5fa 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/BaseInstanceSegmentation.h @@ -3,13 +3,11 @@ #include #include #include -#include -#include #include "Types.h" #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include -#include namespace rnexecutorch { namespace models::instance_segmentation { @@ -44,9 +42,6 @@ class BaseInstanceSegmentation : public VisionModel { bool returnMaskAtOriginalResolution, std::string methodName); -protected: - cv::Size modelInputSize() const override; - private: std::vector runInference( const cv::Mat &image, double confidenceThreshold, double iouThreshold, @@ -61,29 +56,21 @@ class BaseInstanceSegmentation : public VisionModel { const std::vector &classIndices, bool returnMaskAtOriginalResolution); - void validateThresholds(double confidenceThreshold, - double iouThreshold) const; void validateOutputTensors(const std::vector &tensors) const; - std::set - prepareAllowedClasses(const std::vector &classIndices) const; - - // Model loading and input helpers - void ensureMethodLoaded(const std::string &methodName); - - std::tuple + std::tuple extractDetectionData(const float *bboxData, const float *scoresData, int32_t index); - cv::Rect computeMaskCropRect(const utils::computer_vision::BBox &bboxModel, + cv::Rect computeMaskCropRect(const cv_processing::BBox &bboxModel, cv::Size modelInputSize, cv::Size maskSize); cv::Rect addPaddingToRect(const cv::Rect &rect, cv::Size maskSize); - cv::Mat - warpToOriginalResolution(const cv::Mat &probMat, const cv::Rect &maskRect, - cv::Size originalSize, cv::Size maskSize, - const utils::computer_vision::BBox &bboxOriginal); + cv::Mat warpToOriginalResolution(const cv::Mat &probMat, + const cv::Rect &maskRect, + cv::Size originalSize, cv::Size maskSize, + const cv_processing::BBox &bboxOriginal); cv::Mat thresholdToBinary(const cv::Mat &probMat); @@ -91,15 +78,13 @@ class BaseInstanceSegmentation : public VisionModel { finalizeInstances(std::vector instances, double iouThreshold, int32_t maxInstances) const; - cv::Mat processMaskFromLogits( - const cv::Mat &logitsMat, const utils::computer_vision::BBox &bboxModel, - const utils::computer_vision::BBox &bboxOriginal, cv::Size modelInputSize, - cv::Size originalSize, bool warpToOriginal); + cv::Mat processMaskFromLogits(const cv::Mat &logitsMat, + const cv_processing::BBox &bboxModel, + const cv_processing::BBox &bboxOriginal, + cv::Size modelInputSize, cv::Size originalSize, + bool warpToOriginal); - std::optional normMean_; - std::optional normStd_; bool applyNMS_; - std::string currentlyLoadedMethod_; }; } // namespace models::instance_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/Types.h index 9006688ce1..7fabaeea69 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/Types.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/instance_segmentation/Types.h @@ -2,8 +2,8 @@ #include #include +#include #include -#include namespace rnexecutorch::models::instance_segmentation::types { @@ -16,13 +16,13 @@ namespace rnexecutorch::models::instance_segmentation::types { struct Instance { Instance() = default; - Instance(utils::computer_vision::BBox bbox, - std::shared_ptr mask, int32_t maskWidth, - int32_t maskHeight, int32_t classIndex, float score) + Instance(cv_processing::BBox bbox, std::shared_ptr mask, + int32_t maskWidth, int32_t maskHeight, int32_t classIndex, + float score) : bbox(bbox), mask(std::move(mask)), maskWidth(maskWidth), maskHeight(maskHeight), classIndex(classIndex), score(score) {} - utils::computer_vision::BBox bbox; + cv_processing::BBox bbox; std::shared_ptr mask; int32_t maskWidth; int32_t maskHeight; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index be1eb539a2..b76d78fa6e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -1,16 +1,13 @@ #include "ObjectDetection.h" #include "Constants.h" -#include - #include #include -#include +#include #include #include #include #include -#include namespace rnexecutorch::models::object_detection { @@ -20,64 +17,7 @@ ObjectDetection::ObjectDetection( std::shared_ptr callInvoker) : VisionModel(modelSource, callInvoker), labelNames_(std::move(labelNames)) { - if (normMean.size() == 3) { - normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); - } else if (!normMean.empty()) { - log(LOG_LEVEL::Warn, - "normMean must have 3 elements — ignoring provided value."); - } - if (normStd.size() == 3) { - normStd_ = cv::Scalar(normStd[0], normStd[1], normStd[2]); - } else if (!normStd.empty()) { - log(LOG_LEVEL::Warn, - "normStd must have 3 elements — ignoring provided value."); - } -} - -cv::Size ObjectDetection::modelInputSize() const { - if (currentlyLoadedMethod_.empty()) { - return VisionModel::modelInputSize(); - } - auto inputShapes = getAllInputShapes(currentlyLoadedMethod_); - if (inputShapes.empty() || inputShapes[0].size() < 2) { - return VisionModel::modelInputSize(); - } - const auto &shape = inputShapes[0]; - return {static_cast(shape[shape.size() - 2]), - static_cast(shape[shape.size() - 1])}; -} - -void ObjectDetection::ensureMethodLoaded(const std::string &methodName) { - if (methodName.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "methodName cannot be empty"); - } - if (currentlyLoadedMethod_ == methodName) { - return; - } - if (!module_) { - throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded, - "Model module is not loaded"); - } - if (!currentlyLoadedMethod_.empty()) { - module_->unload_method(currentlyLoadedMethod_); - } - auto loadResult = module_->load_method(methodName); - if (loadResult != executorch::runtime::Error::Ok) { - throw RnExecutorchError( - loadResult, "Failed to load method '" + methodName + - "'. Ensure the method exists in the exported model."); - } - currentlyLoadedMethod_ = methodName; -} - -std::set ObjectDetection::prepareAllowedClasses( - const std::vector &classIndices) const { - std::set allowedClasses; - if (!classIndices.empty()) { - allowedClasses.insert(classIndices.begin(), classIndices.end()); - } - return allowedClasses; + initializeNormalization(normMean, normStd); } std::vector @@ -91,7 +31,7 @@ ObjectDetection::postprocess(const std::vector &tensors, static_cast(originalSize.height) / inputSize.height; // Prepare allowed classes set for filtering - auto allowedClasses = prepareAllowedClasses(classIndices); + auto allowedClasses = cv_processing::prepareAllowedClasses(classIndices); std::vector detections; auto bboxTensor = tensors.at(0).toTensor(); @@ -134,24 +74,17 @@ ObjectDetection::postprocess(const std::vector &tensors, " exceeds labelNames size " + std::to_string(labelNames_.size()) + ". Ensure the labelMap covers all model output classes."); } - detections.emplace_back(utils::computer_vision::BBox{x1, y1, x2, y2}, + detections.emplace_back(cv_processing::BBox{x1, y1, x2, y2}, labelNames_[labelIdx], labelIdx, scores[i]); } - return utils::computer_vision::nonMaxSuppression(detections, iouThreshold); + return cv_processing::nonMaxSuppression(detections, iouThreshold); } std::vector ObjectDetection::runInference( cv::Mat image, double detectionThreshold, double iouThreshold, const std::vector &classIndices, const std::string &methodName) { - if (detectionThreshold < 0.0 || detectionThreshold > 1.0) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "detectionThreshold must be in range [0, 1]"); - } - if (iouThreshold < 0.0 || iouThreshold > 1.0) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - "iouThreshold must be in range [0, 1]"); - } + cv_processing::validateThresholds(detectionThreshold, iouThreshold); std::scoped_lock lock(inference_mutex_); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index 6e3c01356e..f52f29e223 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -86,16 +86,6 @@ class ObjectDetection : public VisionModel { std::string methodName); protected: - /** - * @brief Returns the model input size based on the currently loaded method. - * - * Overrides VisionModel::modelInputSize() to support multi-method models - * where each method may have different input dimensions. - * - * @return The expected input size for the currently loaded method. - */ - cv::Size modelInputSize() const override; - std::vector runInference(cv::Mat image, double detectionThreshold, double iouThreshold, const std::vector &classIndices, @@ -125,36 +115,8 @@ class ObjectDetection : public VisionModel { double detectionThreshold, double iouThreshold, const std::vector &classIndices); - /** - * @brief Ensures the specified method is loaded, unloading any previous - * method if necessary. - * - * @param methodName Name of the method to load (e.g., "forward", - * "forward_384"). - * @throws RnExecutorchError if the method cannot be loaded. - */ - void ensureMethodLoaded(const std::string &methodName); - - /** - * @brief Prepares a set of allowed class indices for filtering detections. - * - * @param classIndices Vector of class indices to allow. - * @return A set containing the allowed class indices. - */ - std::set - prepareAllowedClasses(const std::vector &classIndices) const; - - /// Optional per-channel mean for input normalisation (set in constructor). - std::optional normMean_; - - /// Optional per-channel standard deviation for input normalisation. - std::optional normStd_; - /// Ordered label strings mapping class indices to human-readable names. std::vector labelNames_; - - /// Name of the currently loaded method (for multi-method models). - std::string currentlyLoadedMethod_; }; } // namespace models::object_detection diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/Types.h index 1652516e89..2f63aa29a8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/Types.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/Types.h @@ -1,19 +1,19 @@ #pragma once #include -#include +#include #include namespace rnexecutorch::models::object_detection::types { struct Detection { Detection() = default; - Detection(utils::computer_vision::BBox bbox, std::string label, - int32_t classIndex, float score) + Detection(cv_processing::BBox bbox, std::string label, int32_t classIndex, + float score) : bbox(bbox), label(std::move(label)), classIndex(classIndex), score(score) {} - utils::computer_vision::BBox bbox; + cv_processing::BBox bbox; std::string label; int32_t classIndex; float score; diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index f6fe386a7e..75f579a713 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -103,6 +103,10 @@ set(IMAGE_UTILS_SOURCES ${COMMON_DIR}/ada/ada.cpp ) +set(CV_PROCESSING_SOURCES + ${RNEXECUTORCH_DIR}/data_processing/CVProcessing.cpp +) + set(TOKENIZER_SOURCES ${RNEXECUTORCH_DIR}/TokenizerModule.cpp) set(DSP_SOURCES ${RNEXECUTORCH_DIR}/data_processing/dsp.cpp) @@ -157,6 +161,12 @@ add_rn_test(ImageProcessingTest unit/ImageProcessingTest.cpp LIBS opencv_deps ) +add_rn_test(CVProcessingTest unit/CVProcessingTest.cpp + SOURCES + ${CV_PROCESSING_SOURCES} + LIBS opencv_deps +) + add_rn_test(FrameProcessorTests unit/FrameProcessorTest.cpp SOURCES ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp @@ -179,6 +189,7 @@ add_rn_test(VisionModelTests integration/VisionModelTest.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) @@ -190,6 +201,7 @@ add_rn_test(ClassificationTests integration/ClassificationTest.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) @@ -202,6 +214,7 @@ add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp ${RNEXECUTORCH_DIR}/utils/computer_vision/Processing.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) @@ -214,6 +227,7 @@ add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) @@ -233,6 +247,7 @@ add_rn_test(StyleTransferTests integration/StyleTransferTest.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${RNEXECUTORCH_DIR}/utils/FrameTransform.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) @@ -306,7 +321,7 @@ add_rn_test(InstanceSegmentationTests integration/InstanceSegmentationTest.cpp ${RNEXECUTORCH_DIR}/models/VisionModel.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp - ${RNEXECUTORCH_DIR}/utils/computer_vision/Processing.cpp + ${CV_PROCESSING_SOURCES} ${IMAGE_UTILS_SOURCES} LIBS opencv_deps android ) diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index de36b3c545..553f4e61e6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -50,7 +50,8 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generateFromString(kValidTestImagePath, 0.5); + (void)model.generateFromString(kValidTestImagePath, 0.5, 0.5, {}, + "forward"); } }; } // namespace model_tests @@ -67,57 +68,65 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ObjectDetection, VisionModelTest, TEST(ObjectDetectionGenerateTests, InvalidImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5, 0.5, + {}, "forward"), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, EmptyImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString("", 0.5), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString("", 0.5, 0.5, {}, "forward"), + RnExecutorchError); } TEST(ObjectDetectionGenerateTests, MalformedURIThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5, 0.5, + {}, "forward"), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, NegativeThresholdThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1, 0.5, + {}, "forward"), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ThresholdAboveOneThrows) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1, 0.5, {}, + "forward"), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ValidImageReturnsResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = model.generateFromString(kValidTestImagePath, 0.3); + auto results = + model.generateFromString(kValidTestImagePath, 0.3, 0.5, {}, "forward"); EXPECT_GE(results.size(), 0u); } TEST(ObjectDetectionGenerateTests, HighThresholdReturnsFewerResults) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto lowThresholdResults = model.generateFromString(kValidTestImagePath, 0.1); + auto lowThresholdResults = + model.generateFromString(kValidTestImagePath, 0.1, 0.5, {}, "forward"); auto highThresholdResults = - model.generateFromString(kValidTestImagePath, 0.9); + model.generateFromString(kValidTestImagePath, 0.9, 0.5, {}, "forward"); EXPECT_GE(lowThresholdResults.size(), highThresholdResults.size()); } TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = model.generateFromString(kValidTestImagePath, 0.3); + auto results = + model.generateFromString(kValidTestImagePath, 0.3, 0.5, {}, "forward"); for (const auto &detection : results) { EXPECT_LE(detection.bbox.x1, detection.bbox.x2); @@ -130,7 +139,8 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = model.generateFromString(kValidTestImagePath, 0.3); + auto results = + model.generateFromString(kValidTestImagePath, 0.3, 0.5, {}, "forward"); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -141,7 +151,8 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { ObjectDetection model(kValidObjectDetectionModelPath, {}, {}, kCocoLabels, nullptr); - auto results = model.generateFromString(kValidTestImagePath, 0.3); + auto results = + model.generateFromString(kValidTestImagePath, 0.3, 0.5, {}, "forward"); for (const auto &detection : results) { const auto &label = detection.label; @@ -162,7 +173,7 @@ TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - auto results = model.generateFromPixels(tensorView, 0.3); + auto results = model.generateFromPixels(tensorView, 0.3, 0.5, {}, "forward"); EXPECT_GE(results.size(), 0u); } @@ -174,8 +185,9 @@ TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1), - RnExecutorchError); + EXPECT_THROW( + (void)model.generateFromPixels(tensorView, -0.1, 0.5, {}, "forward"), + RnExecutorchError); } TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { @@ -186,8 +198,9 @@ TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, executorch::aten::ScalarType::Byte}; - EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1), - RnExecutorchError); + EXPECT_THROW( + (void)model.generateFromPixels(tensorView, 1.1, 0.5, {}, "forward"), + RnExecutorchError); } TEST(ObjectDetectionInheritedTests, GetInputShapeWorks) { @@ -239,5 +252,6 @@ TEST(ObjectDetectionNormTests, ValidNormParamsGenerateSucceeds) { const std::vector std = {0.229f, 0.224f, 0.225f}; ObjectDetection model(kValidObjectDetectionModelPath, mean, std, kCocoLabels, nullptr); - EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath, 0.5)); + EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath, 0.5, 0.5, + {}, "forward")); } diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/CVProcessingTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/CVProcessingTest.cpp new file mode 100644 index 0000000000..246cdafc2f --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/tests/unit/CVProcessingTest.cpp @@ -0,0 +1,244 @@ +#include +#include +#include +#include + +using namespace rnexecutorch::cv_processing; + +class CVProcessingTest : public ::testing::Test {}; + +// ============================================================================ +// prepareAllowedClasses Tests +// ============================================================================ + +TEST_F(CVProcessingTest, PrepareAllowedClasses_EmptyVector_ReturnsEmptySet) { + std::vector input = {}; + auto result = prepareAllowedClasses(input); + EXPECT_TRUE(result.empty()); +} + +TEST_F(CVProcessingTest, PrepareAllowedClasses_SingleClass_ReturnsSetWithOne) { + std::vector input = {5}; + auto result = prepareAllowedClasses(input); + EXPECT_EQ(result.size(), 1); + EXPECT_TRUE(result.count(5) > 0); +} + +TEST_F(CVProcessingTest, + PrepareAllowedClasses_MultipleClasses_ReturnsCorrectSet) { + std::vector input = {1, 3, 5, 7}; + auto result = prepareAllowedClasses(input); + EXPECT_EQ(result.size(), 4); + EXPECT_TRUE(result.count(1) > 0); + EXPECT_TRUE(result.count(3) > 0); + EXPECT_TRUE(result.count(5) > 0); + EXPECT_TRUE(result.count(7) > 0); +} + +TEST_F(CVProcessingTest, + PrepareAllowedClasses_DuplicateClasses_RemovesDuplicates) { + std::vector input = {1, 3, 3, 5, 1}; + auto result = prepareAllowedClasses(input); + EXPECT_EQ(result.size(), 3); // Should have 1, 3, 5 + EXPECT_TRUE(result.count(1) > 0); + EXPECT_TRUE(result.count(3) > 0); + EXPECT_TRUE(result.count(5) > 0); +} + +// ============================================================================ +// validateThresholds Tests +// ============================================================================ + +TEST_F(CVProcessingTest, ValidateThresholds_ValidValues_DoesNotThrow) { + EXPECT_NO_THROW(validateThresholds(0.5, 0.5)); + EXPECT_NO_THROW(validateThresholds(0.0, 0.0)); + EXPECT_NO_THROW(validateThresholds(1.0, 1.0)); +} + +TEST_F(CVProcessingTest, ValidateThresholds_NegativeConfidence_Throws) { + EXPECT_THROW(validateThresholds(-0.1, 0.5), rnexecutorch::RnExecutorchError); +} + +TEST_F(CVProcessingTest, ValidateThresholds_ConfidenceAboveOne_Throws) { + EXPECT_THROW(validateThresholds(1.1, 0.5), rnexecutorch::RnExecutorchError); +} + +TEST_F(CVProcessingTest, ValidateThresholds_NegativeIoU_Throws) { + EXPECT_THROW(validateThresholds(0.5, -0.1), rnexecutorch::RnExecutorchError); +} + +TEST_F(CVProcessingTest, ValidateThresholds_IoUAboveOne_Throws) { + EXPECT_THROW(validateThresholds(0.5, 1.1), rnexecutorch::RnExecutorchError); +} + +// ============================================================================ +// computeIoU Tests +// ============================================================================ + +TEST_F(CVProcessingTest, ComputeIoU_IdenticalBoxes_ReturnsOne) { + BBox box{0.0f, 0.0f, 10.0f, 10.0f}; + float iou = computeIoU(box, box); + EXPECT_FLOAT_EQ(iou, 1.0f); +} + +TEST_F(CVProcessingTest, ComputeIoU_NoOverlap_ReturnsZero) { + BBox box1{0.0f, 0.0f, 10.0f, 10.0f}; + BBox box2{20.0f, 20.0f, 30.0f, 30.0f}; + float iou = computeIoU(box1, box2); + EXPECT_FLOAT_EQ(iou, 0.0f); +} + +TEST_F(CVProcessingTest, ComputeIoU_PartialOverlap_ReturnsCorrectValue) { + BBox box1{0.0f, 0.0f, 10.0f, 10.0f}; // Area = 100 + BBox box2{5.0f, 5.0f, 15.0f, 15.0f}; // Area = 100 + // Intersection: (5,5) to (10,10) = 25 + // Union: 100 + 100 - 25 = 175 + // IoU = 25/175 ≈ 0.142857 + float iou = computeIoU(box1, box2); + EXPECT_NEAR(iou, 0.142857f, 0.0001f); +} + +TEST_F(CVProcessingTest, ComputeIoU_OneBoxInsideAnother_ReturnsCorrectValue) { + BBox box1{0.0f, 0.0f, 10.0f, 10.0f}; // Area = 100 + BBox box2{2.0f, 2.0f, 8.0f, 8.0f}; // Area = 36 + // Intersection: 36 (box2 is fully inside) + // Union: 100 + 36 - 36 = 100 + // IoU = 36/100 = 0.36 + float iou = computeIoU(box1, box2); + EXPECT_FLOAT_EQ(iou, 0.36f); +} + +// ============================================================================ +// BBox Tests +// ============================================================================ + +TEST_F(CVProcessingTest, BBox_Width_ReturnsCorrectValue) { + BBox box{0.0f, 0.0f, 10.0f, 5.0f}; + EXPECT_FLOAT_EQ(box.width(), 10.0f); +} + +TEST_F(CVProcessingTest, BBox_Height_ReturnsCorrectValue) { + BBox box{0.0f, 0.0f, 10.0f, 5.0f}; + EXPECT_FLOAT_EQ(box.height(), 5.0f); +} + +TEST_F(CVProcessingTest, BBox_Area_ReturnsCorrectValue) { + BBox box{0.0f, 0.0f, 10.0f, 5.0f}; + EXPECT_FLOAT_EQ(box.area(), 50.0f); +} + +TEST_F(CVProcessingTest, BBox_IsValid_ValidBox_ReturnsTrue) { + BBox box{0.0f, 0.0f, 10.0f, 5.0f}; + EXPECT_TRUE(box.isValid()); +} + +TEST_F(CVProcessingTest, BBox_IsValid_InvalidBox_ReturnsFalse) { + BBox box1{10.0f, 0.0f, 5.0f, 5.0f}; // x2 < x1 + EXPECT_FALSE(box1.isValid()); + + BBox box2{0.0f, 10.0f, 5.0f, 5.0f}; // y2 < y1 + EXPECT_FALSE(box2.isValid()); + + BBox box3{-1.0f, 0.0f, 5.0f, 5.0f}; // negative x1 + EXPECT_FALSE(box3.isValid()); +} + +TEST_F(CVProcessingTest, BBox_Scale_ReturnsCorrectlyScaledBox) { + BBox box{1.0f, 2.0f, 3.0f, 4.0f}; + BBox scaled = box.scale(2.0f, 3.0f); + EXPECT_FLOAT_EQ(scaled.x1, 2.0f); + EXPECT_FLOAT_EQ(scaled.y1, 6.0f); + EXPECT_FLOAT_EQ(scaled.x2, 6.0f); + EXPECT_FLOAT_EQ(scaled.y2, 12.0f); +} + +// ============================================================================ +// ScaleRatios Tests +// ============================================================================ + +TEST_F(CVProcessingTest, ScaleRatios_Compute_ReturnsCorrectRatios) { + cv::Size original(640, 480); + cv::Size model(320, 240); + auto ratios = ScaleRatios::compute(original, model); + EXPECT_FLOAT_EQ(ratios.widthRatio, 2.0f); + EXPECT_FLOAT_EQ(ratios.heightRatio, 2.0f); +} + +// ============================================================================ +// validateNormParam Tests +// ============================================================================ + +TEST_F(CVProcessingTest, ValidateNormParam_ValidThreeElements_ReturnsScalar) { + std::vector values = {0.5f, 0.6f, 0.7f}; + auto result = validateNormParam(values, "test"); + ASSERT_TRUE(result.has_value()); + EXPECT_FLOAT_EQ((*result)[0], 0.5f); + EXPECT_FLOAT_EQ((*result)[1], 0.6f); + EXPECT_FLOAT_EQ((*result)[2], 0.7f); +} + +TEST_F(CVProcessingTest, ValidateNormParam_EmptyVector_ReturnsNullopt) { + std::vector values = {}; + auto result = validateNormParam(values, "test"); + EXPECT_FALSE(result.has_value()); +} + +TEST_F(CVProcessingTest, ValidateNormParam_WrongSize_ReturnsNullopt) { + std::vector values = {0.5f, 0.6f}; // Only 2 elements + auto result = validateNormParam(values, "test"); + EXPECT_FALSE(result.has_value()); +} + +// ============================================================================ +// nonMaxSuppression Tests +// ============================================================================ + +struct TestDetection { + BBox bbox; + float score; + int32_t classIndex; +}; + +TEST_F(CVProcessingTest, NonMaxSuppression_EmptyVector_ReturnsEmpty) { + std::vector detections = {}; + auto result = nonMaxSuppression(detections, 0.5); + EXPECT_TRUE(result.empty()); +} + +TEST_F(CVProcessingTest, + NonMaxSuppression_SingleDetection_ReturnsSingleDetection) { + std::vector detections = { + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.9f, 1}}; + auto result = nonMaxSuppression(detections, 0.5); + EXPECT_EQ(result.size(), 1); + EXPECT_FLOAT_EQ(result[0].score, 0.9f); +} + +TEST_F(CVProcessingTest, + NonMaxSuppression_OverlappingBoxes_SuppressesLowerScore) { + std::vector detections = { + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.9f, 1}, // High score + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.5f, 1}, // Same box, low score + }; + auto result = nonMaxSuppression(detections, 0.5); + EXPECT_EQ(result.size(), 1); + EXPECT_FLOAT_EQ(result[0].score, 0.9f); +} + +TEST_F(CVProcessingTest, NonMaxSuppression_DifferentClasses_KeepsBothBoxes) { + std::vector detections = { + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.9f, 1}, // Class 1 + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.8f, 2}, // Class 2, same location + }; + auto result = nonMaxSuppression(detections, 0.5); + EXPECT_EQ(result.size(), 2); // Both should be kept (different classes) +} + +TEST_F(CVProcessingTest, NonMaxSuppression_NoOverlap_KeepsAllBoxes) { + std::vector detections = { + {{0.0f, 0.0f, 10.0f, 10.0f}, 0.9f, 1}, + {{20.0f, 20.0f, 30.0f, 30.0f}, 0.8f, 1}, + }; + auto result = nonMaxSuppression(detections, 0.5); + EXPECT_EQ(result.size(), 2); // Both should be kept (no overlap) +} diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp deleted file mode 100644 index 108fd6ff8a..0000000000 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "Processing.h" -#include -#include - -namespace rnexecutorch::utils::computer_vision { - -float computeIoU(const BBox &a, const BBox &b) { - float x1 = std::max(a.x1, b.x1); - float y1 = std::max(a.y1, b.y1); - float x2 = std::min(a.x2, b.x2); - float y2 = std::min(a.y2, b.y2); - - float intersectionArea = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1); - float areaA = a.area(); - float areaB = b.area(); - float unionArea = areaA + areaB - intersectionArea; - - return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f; -} - -} // namespace rnexecutorch::utils::computer_vision diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h deleted file mode 100644 index 3bd3022d4a..0000000000 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Processing.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "Types.h" -#include -#include - -namespace rnexecutorch::utils::computer_vision { - -float computeIoU(const BBox &a, const BBox &b); - -template -std::vector nonMaxSuppression(std::vector items, double iouThreshold) { - if (items.empty()) { - return {}; - } - - std::ranges::sort(items, - [](const T &a, const T &b) { return a.score > b.score; }); - - std::vector result; - std::vector suppressed(items.size(), false); - - for (size_t i = 0; i < items.size(); ++i) { - if (suppressed[i]) { - continue; - } - - result.push_back(items[i]); - - for (size_t j = i + 1; j < items.size(); ++j) { - if (suppressed[j]) { - continue; - } - - if constexpr (requires(T t) { t.classIndex; }) { - if (items[i].classIndex != items[j].classIndex) { - continue; - } - } - - float iou = computeIoU(items[i].bbox, items[j].bbox); - if (iou > iouThreshold) { - suppressed[j] = true; - } - } - } - - return result; -} - -} // namespace rnexecutorch::utils::computer_vision diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Types.h b/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Types.h deleted file mode 100644 index 8899d3b87c..0000000000 --- a/packages/react-native-executorch/common/rnexecutorch/utils/computer_vision/Types.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include - -namespace rnexecutorch::utils::computer_vision { - -struct BBox { - - float width() const { return x2 - x1; } - - float height() const { return y2 - y1; } - - float area() const { return width() * height(); } - - bool isValid() const { - return x2 > x1 && y2 > y1 && x1 >= 0.0f && y1 >= 0.0f; - } - - BBox scale(float widthRatio, float heightRatio) const { - return {x1 * widthRatio, y1 * heightRatio, x2 * widthRatio, - y2 * heightRatio}; - } - - float x1, y1, x2, y2; -}; - -template -concept HasBBoxAndScore = requires(T t) { - { t.bbox } -> std::convertible_to; - { t.score } -> std::convertible_to; -}; - -} // namespace rnexecutorch::utils::computer_vision