Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions libraries/dl-streamer/docs/source/elements/gvagenai.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ Pad Templates:
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]
video/x-raw(memory:DMABuf)
format: { (string)DMA_DRM }
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]
video/x-raw(memory:VAMemory)
format: { (string)NV12 }
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]

SRC template: 'src'
Availability: Always
Expand All @@ -107,6 +117,16 @@ Pad Templates:
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]
video/x-raw(memory:DMABuf)
format: { (string)DMA_DRM }
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]
video/x-raw(memory:VAMemory)
format: { (string)NV12 }
width: [ 1, 2147483647 ]
height: [ 1, 2147483647 ]
framerate: [ 0/1, 2147483647/1 ]

Element has no clocking capabilities.
Element has no URI handling capabilities.
Expand Down Expand Up @@ -148,6 +168,9 @@ Element Properties:
prompt : Text prompt for the GenAI model
flags: readable, writable
String. Default: null
prompt-path : Path to text prompt file for the GenAI model
flags: readable, writable
String. Default: null
qos : Handle Quality-of-Service events
flags: readable, writable
Boolean. Default: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ target_link_libraries(${TARGET_NAME}
${GSTREAMER_VIDEO_LIBRARIES}
openvino::genai
${OpenCV_LIBS}
common
dlstreamer_gst_meta
json-hpp
)
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ namespace genai {
OpenVINOGenAIContext::OpenVINOGenAIContext(const std::string &model_path, const std::string &device,
const std::string &cache_path, const std::string &generation_config_str,
const std::string &scheduler_config_str) {
// Initialize memory mapper for GStreamer buffers
mapper = std::make_shared<dlstreamer::MemoryMapperGSTToCPU>(nullptr, nullptr);

// Set configurations if provided
if (!generation_config_str.empty()) {
generation_config = ConfigParser::parse_generation_config_string(generation_config_str);
Expand Down Expand Up @@ -60,58 +63,99 @@ OpenVINOGenAIContext::~OpenVINOGenAIContext() {

bool OpenVINOGenAIContext::add_tensor_to_vector(GstBuffer *buffer, GstVideoInfo *info) {
try {
GstMapInfo map;
if (!gst_buffer_map(buffer, &map, GST_MAP_READ)) {
GST_ERROR("Failed to map buffer");
return false;
// Create a GSTFrame and map to CPU memory
auto gst_frame = std::make_shared<dlstreamer::GSTFrame>(buffer, info);
auto mapped_frame = mapper->map(gst_frame, dlstreamer::AccessMode::Read);

// Convert to Mat, code from gvawatermark
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conversion of VAMemory / DMABuf to OpenCV Mat will involve memory copy.
Please add support for remote tensors instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RemoteTensor is not supported. See CVS-160602

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is GPU memory not supported at all, or the limitation is lack of ov::preprocessor in genai front-end?
We should be able to generate RGB or BGR images in VAMemory, and create tensor wrappers around.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both. VAAPI surface doesn't support 24 bits RGB or BGR, only supports 32 bits RGBA or BGRA.
For GenAI, the image encoding (preprocessing) handles the data on CPU. See classes.cpp llava_image_embed_make_with_bytes_slice and resample functions.
Also the VLMPipeline class doesn't expose RemoteContext, also RemoteContext cannot be passed into VLMPipeline, so no way to create a RemoteTensor.

static constexpr std::array<int, 4> channels_to_cvtype_map = {CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4};
std::vector<cv::Mat> image_planes;
image_planes.reserve(mapped_frame->num_tensors());

// Go through planes and create cv::Mat for every plane
for (auto &tensor : *mapped_frame) {
// Verify number of channels
dlstreamer::ImageInfo image_info(tensor->info());
assert(image_info.channels() > 0 && image_info.channels() <= channels_to_cvtype_map.size());
const int cv_type = channels_to_cvtype_map[image_info.channels() - 1];
image_planes.emplace_back(image_info.height(), image_info.width(), cv_type, tensor->data(),
image_info.width_stride());
}

// Convert GStreamer buffer to OpenCV Mat
auto check_planes = [&image_planes](size_t n) {
if (image_planes.size() != n)
throw std::runtime_error("Image format error, plane count != " + std::to_string(n));
};

// Convert Mat to RGB format
cv::Mat frame;
switch (GST_VIDEO_INFO_FORMAT(info)) {
case GST_VIDEO_FORMAT_RGB:
frame = cv::Mat(info->height, info->width, CV_8UC3, map.data);
check_planes(1);
frame = image_planes[0];
break;
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBx:
frame = cv::Mat(info->height, info->width, CV_8UC4, map.data);
cv::cvtColor(frame, frame, cv::COLOR_RGBA2RGB);
check_planes(1);
cv::cvtColor(image_planes[0], frame, cv::COLOR_RGBA2RGB);
break;
case GST_VIDEO_FORMAT_BGR:
frame = cv::Mat(info->height, info->width, CV_8UC3, map.data);
cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
check_planes(1);
cv::cvtColor(image_planes[0], frame, cv::COLOR_BGR2RGB);
break;
case GST_VIDEO_FORMAT_BGRA:
case GST_VIDEO_FORMAT_BGRx:
frame = cv::Mat(info->height, info->width, CV_8UC4, map.data);
cv::cvtColor(frame, frame, cv::COLOR_BGRA2RGB);
check_planes(1);
cv::cvtColor(image_planes[0], frame, cv::COLOR_BGRA2RGB);
break;
case GST_VIDEO_FORMAT_NV12: {
frame = cv::Mat(info->height * 3 / 2, info->width, CV_8UC1, map.data);
cv::cvtColor(frame, frame, cv::COLOR_YUV2RGB_NV12);
check_planes(2);
cv::cvtColorTwoPlane(image_planes[0], image_planes[1], frame, cv::COLOR_YUV2RGB_NV12);
break;
}
case GST_VIDEO_FORMAT_I420: {
frame = cv::Mat(info->height * 3 / 2, info->width, CV_8UC1, map.data);
cv::cvtColor(frame, frame, cv::COLOR_YUV2RGB_I420);
check_planes(3);
// For I420, need to create a single Mat with the layout Y+U+V
uint8_t *y_data = image_planes[0].data;
uint8_t *u_data = image_planes[1].data;
uint8_t *v_data = image_planes[2].data;
int y_size = image_planes[0].rows * image_planes[0].step;
int u_size = image_planes[1].rows * image_planes[1].step;
int v_size = image_planes[2].rows * image_planes[2].step;

// Check if planes are contiguous
if (u_data == y_data + y_size && v_data == u_data + u_size) {
// Planes are contiguous (typical)
cv::Mat yuv(info->height * 3 / 2, info->width, CV_8UC1, y_data);
cv::cvtColor(yuv, frame, cv::COLOR_YUV2RGB_I420);
} else {
// Planes are not contiguous, need to copy (fallback)
cv::Mat yuv(info->height * 3 / 2, info->width, CV_8UC1);
image_planes[0].copyTo(yuv.rowRange(0, info->height));
image_planes[1].copyTo(yuv.rowRange(info->height, info->height + info->height / 4));
image_planes[2].copyTo(yuv.rowRange(info->height + info->height / 4, info->height * 3 / 2));
cv::cvtColor(yuv, frame, cv::COLOR_YUV2RGB_I420);
}
break;
}
default:
gst_buffer_unmap(buffer, &map);
GST_ERROR("Unsupported video format");
return false;
}

// Create tensor
auto tensor = ov::Tensor(ov::element::u8, {1, static_cast<unsigned long long>(frame.rows),
static_cast<unsigned long long>(frame.cols),
static_cast<unsigned long long>(frame.channels())});
memcpy(tensor.data(), frame.data, frame.total() * frame.elemSize());
auto tensor = ov::Tensor(ov::element::u8, {1, static_cast<size_t>(frame.rows), static_cast<size_t>(frame.cols),
static_cast<size_t>(frame.channels())});
size_t expected_size = frame.total() * frame.elemSize();
if (tensor.get_byte_size() != expected_size) {
GST_ERROR("Tensor size mismatch: expected %zu, got %zu", expected_size, tensor.get_byte_size());
return false;
}
memcpy(tensor.data(), frame.data, expected_size);

// Add tensor to vector
tensor_vector.push_back(tensor);

gst_buffer_unmap(buffer, &map);
return true;
} catch (const std::exception &e) {
GST_ERROR("Error converting frame to tensor: %s", e.what());
Expand Down Expand Up @@ -198,6 +242,8 @@ std::string OpenVINOGenAIContext::create_json_metadata(GstClockTime timestamp, b
if (include_metrics) {
nlohmann::json metrics_obj = {
{"load_time", round_2dp(metrics.get_load_time())},
{"num_generated_tokens", metrics.get_num_generated_tokens()},
{"num_input_tokens", metrics.get_num_input_tokens()},
{"generate_time_mean", round_2dp(metrics.get_generate_duration().mean)},
{"generate_time_std", round_2dp(metrics.get_generate_duration().std)},
{"tokenization_time_mean", round_2dp(metrics.get_tokenization_duration().mean)},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <gst/gst.h>
#include <gst/video/video.h>

#include "dlstreamer/gst/mappers/gst_to_cpu.h"

#include <opencv2/opencv.hpp>
#include <openvino/genai/visual_language/pipeline.hpp>

Expand Down Expand Up @@ -98,6 +100,7 @@ class OpenVINOGenAIContext {
std::string create_json_metadata(GstClockTime timestamp = GST_CLOCK_TIME_NONE, bool include_metrics = false);

private:
std::shared_ptr<dlstreamer::MemoryMapperGSTToCPU> mapper = nullptr;
std::unique_ptr<ov::genai::VLMPipeline> pipeline = nullptr;
ov::AnyMap generation_config = {};
std::optional<ov::genai::SchedulerConfig> scheduler_config = std::nullopt;
Expand Down
Loading
Loading