diff --git a/.github/workflows/ci-freebsd.yml b/.github/workflows/ci-freebsd.yml index d510bf30397..8b67f3b01d1 100644 --- a/.github/workflows/ci-freebsd.yml +++ b/.github/workflows/ci-freebsd.yml @@ -113,6 +113,8 @@ jobs: devel/pkgconf \ ftp/curl \ graphics/libdrm \ + graphics/vulkan-headers \ + graphics/vulkan-loader \ graphics/wayland \ lang/python312 \ multimedia/libva \ diff --git a/cmake/compile_definitions/linux.cmake b/cmake/compile_definitions/linux.cmake index 3758884fdce..d161499d6fc 100644 --- a/cmake/compile_definitions/linux.cmake +++ b/cmake/compile_definitions/linux.cmake @@ -120,6 +120,21 @@ if(LIBVA_FOUND) "${CMAKE_SOURCE_DIR}/src/platform/linux/vaapi.cpp") endif() +# vulkan video encoding (via FFmpeg) +if(${SUNSHINE_ENABLE_VULKAN}) + find_package(Vulkan REQUIRED) +else() + set(Vulkan_FOUND OFF) +endif() +if(Vulkan_FOUND) + list(APPEND SUNSHINE_DEFINITIONS SUNSHINE_BUILD_VULKAN=1) + include_directories(SYSTEM ${Vulkan_INCLUDE_DIRS}) + list(APPEND PLATFORM_LIBRARIES ${Vulkan_LIBRARIES}) + list(APPEND PLATFORM_TARGET_FILES + "${CMAKE_SOURCE_DIR}/src/platform/linux/vulkan_encode.h" + "${CMAKE_SOURCE_DIR}/src/platform/linux/vulkan_encode.cpp") +endif() + # wayland if(${SUNSHINE_ENABLE_WAYLAND}) find_package(Wayland REQUIRED) diff --git a/cmake/prep/options.cmake b/cmake/prep/options.cmake index 6b732a957e6..9be4629245a 100644 --- a/cmake/prep/options.cmake +++ b/cmake/prep/options.cmake @@ -60,6 +60,8 @@ elseif(UNIX) # Linux "Enable KMS grab if available." ON) option(SUNSHINE_ENABLE_VAAPI "Enable building vaapi specific code." ON) + option(SUNSHINE_ENABLE_VULKAN + "Enable Vulkan video encoding." ON) option(SUNSHINE_ENABLE_WAYLAND "Enable building wayland specific code." ON) option(SUNSHINE_ENABLE_X11 diff --git a/docs/configuration.md b/docs/configuration.md index 662a07f1994..8907269da84 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2126,6 +2126,11 @@ editing the `conf` file in a text editor. Use the examples as reference. vaapi Use VA-API (AMD, Intel) + + vulkan + Use Vulkan encoder (AMD, Intel, NVIDIA). + @note{Applies to Linux only.} + software Encoding occurs on the CPU diff --git a/packaging/linux/Arch/PKGBUILD b/packaging/linux/Arch/PKGBUILD index 298f8964137..15d4acce6a2 100644 --- a/packaging/linux/Arch/PKGBUILD +++ b/packaging/linux/Arch/PKGBUILD @@ -50,6 +50,7 @@ depends=( 'openssl' 'opus' 'udev' + 'vulkan-icd-loader' 'which' ) @@ -63,6 +64,7 @@ makedepends=( 'make' 'nodejs' 'npm' + 'vulkan-headers' ) checkdepends=( diff --git a/packaging/linux/copr/Sunshine.spec b/packaging/linux/copr/Sunshine.spec index 572f518350c..4815a91bff4 100644 --- a/packaging/linux/copr/Sunshine.spec +++ b/packaging/linux/copr/Sunshine.spec @@ -44,6 +44,8 @@ BuildRequires: openssl-devel BuildRequires: pipewire-devel BuildRequires: rpm-build BuildRequires: systemd-rpm-macros +BuildRequires: vulkan-headers +BuildRequires: vulkan-loader-devel BuildRequires: wget BuildRequires: which @@ -146,6 +148,7 @@ Requires: libX11 >= 1.7.3.1 Requires: numactl-libs >= 2.0.14 Requires: openssl >= 3.0.2 Requires: pulseaudio-libs >= 10.0 +Requires: vulkan-loader %endif %if 0%{?suse_version} @@ -162,6 +165,7 @@ Requires: libX11-6 Requires: libnuma1 Requires: libopenssl3 Requires: libpulse0 +Requires: vulkan-loader %endif %description diff --git a/packaging/sunshine.rb b/packaging/sunshine.rb index 860c890fe05..772c8e374d6 100644 --- a/packaging/sunshine.rb +++ b/packaging/sunshine.rb @@ -94,6 +94,8 @@ class Sunshine < Formula depends_on "pipewire" depends_on "pulseaudio" depends_on "systemd" + depends_on "vulkan-headers" + depends_on "vulkan-loader" depends_on "wayland" end diff --git a/scripts/linux_build.sh b/scripts/linux_build.sh index 2b1d705ee7b..9a79b43dbe0 100755 --- a/scripts/linux_build.sh +++ b/scripts/linux_build.sh @@ -193,6 +193,8 @@ function add_arch_deps() { 'openssl' 'opus' 'udev' + 'vulkan-headers' + 'vulkan-icd-loader' 'wayland' ) @@ -247,6 +249,7 @@ function add_debian_based_deps() { "libxfixes-dev" # X11 "libxrandr-dev" # X11 "libxtst-dev" # X11 + "libvulkan-dev" # Vulkan "ninja-build" "npm" # web-ui "systemd" @@ -326,6 +329,8 @@ function add_fedora_deps() { "pipewire-devel" "pulseaudio-libs-devel" "rpm-build" # if you want to build an RPM binary package + "vulkan-headers" + "vulkan-loader-devel" "wget" # necessary for cuda install with `run` file "which" # necessary for cuda install with `run` file "xorg-x11-server-Xvfb" # necessary for headless unit testing diff --git a/src/config.cpp b/src/config.cpp index c320ed6dccf..7631eba3ce0 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -486,6 +486,11 @@ namespace config { false, // strict_rc_buffer }, // vaapi + { + 2, // vk.tune (default: ll - low latency) + 4, // vk.rc_mode (default: vbr) + }, + {}, // capture {}, // encoder {}, // adapter_name @@ -1116,6 +1121,9 @@ namespace config { bool_f(vars, "vaapi_strict_rc_buffer", video.vaapi.strict_rc_buffer); + int_f(vars, "vk_tune", video.vk.tune); + int_f(vars, "vk_rc_mode", video.vk.rc_mode); + string_f(vars, "capture", video.capture); string_f(vars, "encoder", video.encoder); string_f(vars, "adapter_name", video.adapter_name); diff --git a/src/config.h b/src/config.h index e8d1594fba2..aad68d438ab 100644 --- a/src/config.h +++ b/src/config.h @@ -80,6 +80,11 @@ namespace config { bool strict_rc_buffer; } vaapi; + struct { + int tune; // 0=default, 1=hq, 2=ll, 3=ull, 4=lossless + int rc_mode; // 0=driver, 1=cqp, 2=cbr, 4=vbr + } vk; + std::string capture; std::string encoder; std::string adapter_name; diff --git a/src/platform/common.h b/src/platform/common.h index 274bbbdc8d1..f4d50826319 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -232,6 +232,7 @@ namespace platf { dxgi, ///< DXGI cuda, ///< CUDA videotoolbox, ///< VideoToolbox + vulkan, ///< Vulkan unknown ///< Unknown }; diff --git a/src/platform/linux/kmsgrab.cpp b/src/platform/linux/kmsgrab.cpp index 7859bcd3bb1..4b4614156c4 100644 --- a/src/platform/linux/kmsgrab.cpp +++ b/src/platform/linux/kmsgrab.cpp @@ -27,6 +27,7 @@ #include "src/utility.h" #include "src/video.h" #include "vaapi.h" +#include "vulkan_encode.h" #include "wayland.h" using namespace std::literals; @@ -1238,6 +1239,12 @@ namespace platf { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == mem_type_e::vulkan) { + return vk::make_avcodec_encode_device_ram(width, height); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == mem_type_e::cuda) { return cuda::make_avcodec_encode_device(width, height, false); @@ -1370,6 +1377,12 @@ namespace platf { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == mem_type_e::vulkan) { + return vk::make_avcodec_encode_device_vram(width, height, img_offset_x, img_offset_y); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == mem_type_e::cuda) { return cuda::make_avcodec_gl_encode_device(width, height, img_offset_x, img_offset_y); @@ -1515,7 +1528,7 @@ namespace platf { } // namespace kms std::shared_ptr kms_display(mem_type_e hwdevice_type, const std::string &display_name, const ::video::config_t &config) { - if (hwdevice_type == mem_type_e::vaapi || hwdevice_type == mem_type_e::cuda) { + if (hwdevice_type == mem_type_e::vaapi || hwdevice_type == mem_type_e::cuda || hwdevice_type == mem_type_e::vulkan) { auto disp = std::make_shared(hwdevice_type); if (!disp->init(display_name, config)) { diff --git a/src/platform/linux/portalgrab.cpp b/src/platform/linux/portalgrab.cpp index f5fa5065cc5..1e775ffdd47 100644 --- a/src/platform/linux/portalgrab.cpp +++ b/src/platform/linux/portalgrab.cpp @@ -29,6 +29,7 @@ #include "src/platform/common.h" #include "src/video.h" #include "vaapi.h" +#include "vulkan_encode.h" #include "wayland.h" namespace { @@ -805,6 +806,7 @@ namespace portal { // On hybrid GPU systems (Intel+NVIDIA), DMA-BUFs come from the Intel GPU and cannot // be imported into CUDA, so we fall back to memory buffers in that case. bool use_dmabuf = n_dmabuf_infos > 0 && (mem_type == platf::mem_type_e::vaapi || + mem_type == platf::mem_type_e::vulkan || (mem_type == platf::mem_type_e::cuda && display_is_nvidia)); if (use_dmabuf) { for (int i = 0; i < n_dmabuf_infos; i++) { @@ -1315,6 +1317,12 @@ namespace portal { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == platf::mem_type_e::vulkan) { + return vk::make_avcodec_encode_device_vram(width, height, 0, 0); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == platf::mem_type_e::cuda) { if (display_is_nvidia && n_dmabuf_infos > 0) { @@ -1456,7 +1464,7 @@ namespace portal { namespace platf { std::shared_ptr portal_display(mem_type_e hwdevice_type, const std::string &display_name, const video::config_t &config) { using enum platf::mem_type_e; - if (hwdevice_type != system && hwdevice_type != vaapi && hwdevice_type != cuda) { + if (hwdevice_type != system && hwdevice_type != vaapi && hwdevice_type != cuda && hwdevice_type != vulkan) { BOOST_LOG(error) << "Could not initialize display with the given hw device type."sv; return nullptr; } diff --git a/src/platform/linux/shaders/rgb2nv12.comp b/src/platform/linux/shaders/rgb2nv12.comp new file mode 100644 index 00000000000..b185d2649f1 --- /dev/null +++ b/src/platform/linux/shaders/rgb2nv12.comp @@ -0,0 +1,66 @@ +#version 450 + +layout(local_size_x = 16, local_size_y = 16) in; + +layout(set = 0, binding = 0) uniform sampler2D rgb_in; +layout(set = 0, binding = 1, r8) uniform writeonly image2D y_out; +layout(set = 0, binding = 2, rg8) uniform writeonly image2D uv_out; +layout(set = 0, binding = 3) uniform sampler2D cursor_in; + +layout(push_constant) uniform PushConstants { + vec4 color_vec_y; + vec4 color_vec_u; + vec4 color_vec_v; + vec2 range_y; + vec2 range_uv; + ivec2 src_offset; + ivec2 src_size; + ivec2 dst_size; + ivec2 cursor_pos; + ivec2 cursor_size; // w=0 means no cursor +} pc; + +vec3 blend_cursor(vec3 rgb, ivec2 pos) { + ivec2 cp = pos - pc.cursor_pos; + if (cp.x >= 0 && cp.y >= 0 && cp.x < pc.cursor_size.x && cp.y < pc.cursor_size.y) { + vec4 c = texture(cursor_in, (vec2(cp) + 0.5) / vec2(pc.cursor_size)); + rgb = mix(rgb, c.bgr, c.a); + } + return rgb; +} + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (pos.x >= pc.dst_size.x || pos.y >= pc.dst_size.y) + return; + + vec2 inv_tex = 1.0 / vec2(textureSize(rgb_in, 0)); + vec2 scale = vec2(pc.src_size) / vec2(pc.dst_size); + + vec2 uv = (vec2(pc.src_offset) + (vec2(pos) + 0.5) * scale) * inv_tex; + vec3 rgb = texture(rgb_in, uv).rgb; + + if (pc.cursor_size.x > 0) + rgb = blend_cursor(rgb, pos); + + // Y plane + float y = dot(pc.color_vec_y.xyz, rgb) + pc.color_vec_y.w; + imageStore(y_out, pos, vec4(y * pc.range_y.x + pc.range_y.y, 0, 0, 0)); + + // UV plane (half resolution, one thread per 2x2 block) + if ((pos.x & 1) == 0 && (pos.y & 1) == 0) { + vec2 step = scale * inv_tex; + + vec3 rgb_r = texture(rgb_in, uv + vec2(step.x, 0)).rgb; + if (pc.cursor_size.x > 0) + rgb_r = blend_cursor(rgb_r, pos + ivec2(1, 0)); + + vec3 avg = (rgb + rgb_r) * 0.5; + + float cb = dot(pc.color_vec_u.xyz, avg) + pc.color_vec_u.w; + float cr = dot(pc.color_vec_v.xyz, avg) + pc.color_vec_v.w; + + imageStore(uv_out, pos >> 1, vec4(cb * pc.range_uv.x + pc.range_uv.y, + cr * pc.range_uv.x + pc.range_uv.y, 0, 0)); + } +} diff --git a/src/platform/linux/shaders/rgb2nv12.spv b/src/platform/linux/shaders/rgb2nv12.spv new file mode 100644 index 00000000000..70220131daf Binary files /dev/null and b/src/platform/linux/shaders/rgb2nv12.spv differ diff --git a/src/platform/linux/shaders/rgb2nv12.spv.h b/src/platform/linux/shaders/rgb2nv12.spv.h new file mode 100644 index 00000000000..94116853d59 --- /dev/null +++ b/src/platform/linux/shaders/rgb2nv12.spv.h @@ -0,0 +1,185 @@ +/** + * @file src/platform/linux/shaders/rgb2nv12.spv.h + * @brief Pre-compiled SPIR-V for RGB→NV12 compute shader. + * @note Regenerate: glslc -O rgb2nv12.comp -o rgb2nv12.spv, then convert with xxd or python. + */ +#pragma once +#include + +static const uint32_t rgb2nv12_comp_spv[] = { + 0x07230203, 0x00010000, 0x000d000b, 0x000001ab, 0x00000000, 0x00020011, 0x00000001, 0x00020011, + 0x00000031, 0x00020011, 0x00000032, 0x0006000b, 0x00000001, 0x4c534c47, 0x6474732e, 0x3035342e, + 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0006000f, 0x00000005, 0x00000004, 0x6e69616d, + 0x00000000, 0x00000061, 0x00060010, 0x00000004, 0x00000011, 0x00000010, 0x00000010, 0x00000001, + 0x00030047, 0x00000015, 0x00000002, 0x00050048, 0x00000015, 0x00000000, 0x00000023, 0x00000000, + 0x00050048, 0x00000015, 0x00000001, 0x00000023, 0x00000010, 0x00050048, 0x00000015, 0x00000002, + 0x00000023, 0x00000020, 0x00050048, 0x00000015, 0x00000003, 0x00000023, 0x00000030, 0x00050048, + 0x00000015, 0x00000004, 0x00000023, 0x00000038, 0x00050048, 0x00000015, 0x00000005, 0x00000023, + 0x00000040, 0x00050048, 0x00000015, 0x00000006, 0x00000023, 0x00000048, 0x00050048, 0x00000015, + 0x00000007, 0x00000023, 0x00000050, 0x00050048, 0x00000015, 0x00000008, 0x00000023, 0x00000058, + 0x00050048, 0x00000015, 0x00000009, 0x00000023, 0x00000060, 0x00040047, 0x00000045, 0x00000021, + 0x00000003, 0x00040047, 0x00000045, 0x00000022, 0x00000000, 0x00040047, 0x00000061, 0x0000000b, + 0x0000001c, 0x00040047, 0x0000007b, 0x00000021, 0x00000000, 0x00040047, 0x0000007b, 0x00000022, + 0x00000000, 0x00030047, 0x000000b5, 0x00000019, 0x00040047, 0x000000b5, 0x00000021, 0x00000001, + 0x00040047, 0x000000b5, 0x00000022, 0x00000000, 0x00030047, 0x00000102, 0x00000019, 0x00040047, + 0x00000102, 0x00000021, 0x00000002, 0x00040047, 0x00000102, 0x00000022, 0x00000000, 0x00040047, + 0x00000118, 0x0000000b, 0x00000019, 0x00020013, 0x00000002, 0x00030021, 0x00000003, 0x00000002, + 0x00030016, 0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006, 0x00000003, 0x00040015, + 0x00000009, 0x00000020, 0x00000001, 0x00040017, 0x0000000a, 0x00000009, 0x00000002, 0x00040017, + 0x00000013, 0x00000006, 0x00000004, 0x00040017, 0x00000014, 0x00000006, 0x00000002, 0x000c001e, + 0x00000015, 0x00000013, 0x00000013, 0x00000013, 0x00000014, 0x00000014, 0x0000000a, 0x0000000a, + 0x0000000a, 0x0000000a, 0x0000000a, 0x00040020, 0x00000016, 0x00000009, 0x00000015, 0x0004003b, + 0x00000016, 0x00000017, 0x00000009, 0x0004002b, 0x00000009, 0x00000018, 0x00000008, 0x00040020, + 0x00000019, 0x00000009, 0x0000000a, 0x00020014, 0x0000001d, 0x00040015, 0x0000001e, 0x00000020, + 0x00000000, 0x0004002b, 0x0000001e, 0x0000001f, 0x00000000, 0x0004002b, 0x00000009, 0x00000023, + 0x00000000, 0x0004002b, 0x0000001e, 0x00000027, 0x00000001, 0x0004002b, 0x00000009, 0x00000030, + 0x00000009, 0x00040020, 0x00000031, 0x00000009, 0x00000009, 0x00090019, 0x00000042, 0x00000006, + 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x0003001b, 0x00000043, + 0x00000042, 0x00040020, 0x00000044, 0x00000000, 0x00000043, 0x0004003b, 0x00000044, 0x00000045, + 0x00000000, 0x0004002b, 0x00000006, 0x00000049, 0x3f000000, 0x0004002b, 0x00000006, 0x00000050, + 0x00000000, 0x0004002b, 0x0000001e, 0x00000055, 0x00000003, 0x00040017, 0x0000005f, 0x0000001e, + 0x00000003, 0x00040020, 0x00000060, 0x00000001, 0x0000005f, 0x0004003b, 0x00000060, 0x00000061, + 0x00000001, 0x00040017, 0x00000062, 0x0000001e, 0x00000002, 0x0004002b, 0x00000009, 0x00000068, + 0x00000007, 0x0004002b, 0x00000006, 0x0000007a, 0x3f800000, 0x0004003b, 0x00000044, 0x0000007b, + 0x00000000, 0x0004002b, 0x00000009, 0x00000083, 0x00000006, 0x0004002b, 0x00000009, 0x0000008c, + 0x00000005, 0x00040020, 0x000000a9, 0x00000009, 0x00000013, 0x00040020, 0x000000af, 0x00000009, + 0x00000006, 0x00090019, 0x000000b3, 0x00000006, 0x00000001, 0x00000000, 0x00000000, 0x00000000, + 0x00000002, 0x0000000f, 0x00040020, 0x000000b4, 0x00000000, 0x000000b3, 0x0004003b, 0x000000b4, + 0x000000b5, 0x00000000, 0x0004002b, 0x00000009, 0x000000b9, 0x00000003, 0x0004002b, 0x00000009, + 0x000000c3, 0x00000001, 0x0005002c, 0x0000000a, 0x000000e2, 0x000000c3, 0x00000023, 0x0004002b, + 0x00000009, 0x000000f7, 0x00000002, 0x00090019, 0x00000100, 0x00000006, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x00000002, 0x0000000d, 0x00040020, 0x00000101, 0x00000000, 0x00000100, + 0x0004003b, 0x00000101, 0x00000102, 0x00000000, 0x0004002b, 0x00000009, 0x00000108, 0x00000004, + 0x0004002b, 0x0000001e, 0x00000117, 0x00000010, 0x0006002c, 0x0000005f, 0x00000118, 0x00000117, + 0x00000117, 0x00000027, 0x0005002c, 0x00000014, 0x000001a8, 0x0000007a, 0x0000007a, 0x0005002c, + 0x00000014, 0x000001a9, 0x00000049, 0x00000049, 0x0005002c, 0x0000000a, 0x000001aa, 0x000000c3, + 0x000000c3, 0x00050036, 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005, + 0x000300f7, 0x00000119, 0x00000000, 0x000300fb, 0x0000001f, 0x0000011a, 0x000200f8, 0x0000011a, + 0x0004003d, 0x0000005f, 0x00000063, 0x00000061, 0x0007004f, 0x00000062, 0x00000064, 0x00000063, + 0x00000063, 0x00000000, 0x00000001, 0x0004007c, 0x0000000a, 0x00000065, 0x00000064, 0x00050051, + 0x00000009, 0x00000067, 0x00000065, 0x00000000, 0x00060041, 0x00000031, 0x00000069, 0x00000017, + 0x00000068, 0x0000001f, 0x0004003d, 0x00000009, 0x0000006a, 0x00000069, 0x000500af, 0x0000001d, + 0x0000006b, 0x00000067, 0x0000006a, 0x000400a8, 0x0000001d, 0x0000006c, 0x0000006b, 0x000300f7, + 0x0000006e, 0x00000000, 0x000400fa, 0x0000006c, 0x0000006d, 0x0000006e, 0x000200f8, 0x0000006d, + 0x00050051, 0x00000009, 0x00000070, 0x00000065, 0x00000001, 0x00060041, 0x00000031, 0x00000071, + 0x00000017, 0x00000068, 0x00000027, 0x0004003d, 0x00000009, 0x00000072, 0x00000071, 0x000500af, + 0x0000001d, 0x00000073, 0x00000070, 0x00000072, 0x000200f9, 0x0000006e, 0x000200f8, 0x0000006e, + 0x000700f5, 0x0000001d, 0x00000074, 0x0000006b, 0x0000011a, 0x00000073, 0x0000006d, 0x000300f7, + 0x00000076, 0x00000000, 0x000400fa, 0x00000074, 0x00000075, 0x00000076, 0x000200f8, 0x00000075, + 0x000200f9, 0x00000119, 0x000200f8, 0x00000076, 0x0004003d, 0x00000043, 0x0000007c, 0x0000007b, + 0x00040064, 0x00000042, 0x0000007d, 0x0000007c, 0x00050067, 0x0000000a, 0x0000007e, 0x0000007d, + 0x00000023, 0x0004006f, 0x00000014, 0x0000007f, 0x0000007e, 0x00050088, 0x00000014, 0x00000081, + 0x000001a8, 0x0000007f, 0x00050041, 0x00000019, 0x00000084, 0x00000017, 0x00000083, 0x0004003d, + 0x0000000a, 0x00000085, 0x00000084, 0x0004006f, 0x00000014, 0x00000086, 0x00000085, 0x00050041, + 0x00000019, 0x00000087, 0x00000017, 0x00000068, 0x0004003d, 0x0000000a, 0x00000088, 0x00000087, + 0x0004006f, 0x00000014, 0x00000089, 0x00000088, 0x00050088, 0x00000014, 0x0000008a, 0x00000086, + 0x00000089, 0x00050041, 0x00000019, 0x0000008d, 0x00000017, 0x0000008c, 0x0004003d, 0x0000000a, + 0x0000008e, 0x0000008d, 0x0004006f, 0x00000014, 0x0000008f, 0x0000008e, 0x0004006f, 0x00000014, + 0x00000091, 0x00000065, 0x00050081, 0x00000014, 0x00000093, 0x00000091, 0x000001a9, 0x00050085, + 0x00000014, 0x00000095, 0x00000093, 0x0000008a, 0x00050081, 0x00000014, 0x00000096, 0x0000008f, + 0x00000095, 0x00050085, 0x00000014, 0x00000098, 0x00000096, 0x00000081, 0x0004003d, 0x00000043, + 0x0000009a, 0x0000007b, 0x00070058, 0x00000013, 0x0000009c, 0x0000009a, 0x00000098, 0x00000002, + 0x00000050, 0x0008004f, 0x00000007, 0x0000009d, 0x0000009c, 0x0000009c, 0x00000000, 0x00000001, + 0x00000002, 0x00060041, 0x00000031, 0x0000009e, 0x00000017, 0x00000030, 0x0000001f, 0x0004003d, + 0x00000009, 0x0000009f, 0x0000009e, 0x000500ad, 0x0000001d, 0x000000a0, 0x0000009f, 0x00000023, + 0x000300f7, 0x000000a2, 0x00000000, 0x000400fa, 0x000000a0, 0x000000a1, 0x000000a2, 0x000200f8, + 0x000000a1, 0x00050041, 0x00000019, 0x00000124, 0x00000017, 0x00000018, 0x0004003d, 0x0000000a, + 0x00000125, 0x00000124, 0x00050082, 0x0000000a, 0x00000126, 0x00000065, 0x00000125, 0x00050051, + 0x00000009, 0x00000128, 0x00000126, 0x00000000, 0x000500af, 0x0000001d, 0x00000129, 0x00000128, + 0x00000023, 0x000300f7, 0x0000012e, 0x00000000, 0x000400fa, 0x00000129, 0x0000012a, 0x0000012e, + 0x000200f8, 0x0000012a, 0x00050051, 0x00000009, 0x0000012c, 0x00000126, 0x00000001, 0x000500af, + 0x0000001d, 0x0000012d, 0x0000012c, 0x00000023, 0x000200f9, 0x0000012e, 0x000200f8, 0x0000012e, + 0x000700f5, 0x0000001d, 0x0000012f, 0x00000129, 0x000000a1, 0x0000012d, 0x0000012a, 0x000300f7, + 0x00000136, 0x00000000, 0x000400fa, 0x0000012f, 0x00000130, 0x00000136, 0x000200f8, 0x00000130, + 0x000500b1, 0x0000001d, 0x00000135, 0x00000128, 0x0000009f, 0x000200f9, 0x00000136, 0x000200f8, + 0x00000136, 0x000700f5, 0x0000001d, 0x00000137, 0x0000012f, 0x0000012e, 0x00000135, 0x00000130, + 0x000300f7, 0x0000013e, 0x00000000, 0x000400fa, 0x00000137, 0x00000138, 0x0000013e, 0x000200f8, + 0x00000138, 0x00050051, 0x00000009, 0x0000013a, 0x00000126, 0x00000001, 0x00060041, 0x00000031, + 0x0000013b, 0x00000017, 0x00000030, 0x00000027, 0x0004003d, 0x00000009, 0x0000013c, 0x0000013b, + 0x000500b1, 0x0000001d, 0x0000013d, 0x0000013a, 0x0000013c, 0x000200f9, 0x0000013e, 0x000200f8, + 0x0000013e, 0x000700f5, 0x0000001d, 0x0000013f, 0x00000137, 0x00000136, 0x0000013d, 0x00000138, + 0x000300f7, 0x00000152, 0x00000000, 0x000400fa, 0x0000013f, 0x00000140, 0x00000152, 0x000200f8, + 0x00000140, 0x0004003d, 0x00000043, 0x00000141, 0x00000045, 0x0004006f, 0x00000014, 0x00000143, + 0x00000126, 0x00050081, 0x00000014, 0x00000145, 0x00000143, 0x000001a9, 0x00050041, 0x00000019, + 0x00000146, 0x00000017, 0x00000030, 0x0004003d, 0x0000000a, 0x00000147, 0x00000146, 0x0004006f, + 0x00000014, 0x00000148, 0x00000147, 0x00050088, 0x00000014, 0x00000149, 0x00000145, 0x00000148, + 0x00070058, 0x00000013, 0x0000014a, 0x00000141, 0x00000149, 0x00000002, 0x00000050, 0x0008004f, + 0x00000007, 0x0000014d, 0x0000014a, 0x0000014a, 0x00000002, 0x00000001, 0x00000000, 0x00050051, + 0x00000006, 0x0000014f, 0x0000014a, 0x00000003, 0x00060050, 0x00000007, 0x00000150, 0x0000014f, + 0x0000014f, 0x0000014f, 0x0008000c, 0x00000007, 0x00000151, 0x00000001, 0x0000002e, 0x0000009d, + 0x0000014d, 0x00000150, 0x000200f9, 0x00000152, 0x000200f8, 0x00000152, 0x000700f5, 0x00000007, + 0x0000019b, 0x0000009d, 0x0000013e, 0x00000151, 0x00000140, 0x000200f9, 0x000000a2, 0x000200f8, + 0x000000a2, 0x000700f5, 0x00000007, 0x0000019c, 0x0000009d, 0x00000076, 0x0000019b, 0x00000152, + 0x00050041, 0x000000a9, 0x000000aa, 0x00000017, 0x00000023, 0x0004003d, 0x00000013, 0x000000ab, + 0x000000aa, 0x0008004f, 0x00000007, 0x000000ac, 0x000000ab, 0x000000ab, 0x00000000, 0x00000001, + 0x00000002, 0x00050094, 0x00000006, 0x000000ae, 0x000000ac, 0x0000019c, 0x00060041, 0x000000af, + 0x000000b0, 0x00000017, 0x00000023, 0x00000055, 0x0004003d, 0x00000006, 0x000000b1, 0x000000b0, + 0x00050081, 0x00000006, 0x000000b2, 0x000000ae, 0x000000b1, 0x0004003d, 0x000000b3, 0x000000b6, + 0x000000b5, 0x00060041, 0x000000af, 0x000000ba, 0x00000017, 0x000000b9, 0x0000001f, 0x0004003d, + 0x00000006, 0x000000bb, 0x000000ba, 0x00050085, 0x00000006, 0x000000bc, 0x000000b2, 0x000000bb, + 0x00060041, 0x000000af, 0x000000bd, 0x00000017, 0x000000b9, 0x00000027, 0x0004003d, 0x00000006, + 0x000000be, 0x000000bd, 0x00050081, 0x00000006, 0x000000bf, 0x000000bc, 0x000000be, 0x00070050, + 0x00000013, 0x000000c0, 0x000000bf, 0x00000050, 0x00000050, 0x00000050, 0x00040063, 0x000000b6, + 0x00000065, 0x000000c0, 0x000500c7, 0x00000009, 0x000000c4, 0x00000067, 0x000000c3, 0x000500aa, + 0x0000001d, 0x000000c5, 0x000000c4, 0x00000023, 0x000300f7, 0x000000c7, 0x00000000, 0x000400fa, + 0x000000c5, 0x000000c6, 0x000000c7, 0x000200f8, 0x000000c6, 0x00050051, 0x00000009, 0x000000c9, + 0x00000065, 0x00000001, 0x000500c7, 0x00000009, 0x000000ca, 0x000000c9, 0x000000c3, 0x000500aa, + 0x0000001d, 0x000000cb, 0x000000ca, 0x00000023, 0x000200f9, 0x000000c7, 0x000200f8, 0x000000c7, + 0x000700f5, 0x0000001d, 0x000000cc, 0x000000c5, 0x000000a2, 0x000000cb, 0x000000c6, 0x000300f7, + 0x000000ce, 0x00000000, 0x000400fa, 0x000000cc, 0x000000cd, 0x000000ce, 0x000200f8, 0x000000cd, + 0x00050085, 0x00000014, 0x000000d2, 0x0000008a, 0x00000081, 0x0004003d, 0x00000043, 0x000000d4, + 0x0000007b, 0x00050051, 0x00000006, 0x000000d7, 0x000000d2, 0x00000000, 0x00050050, 0x00000014, + 0x000000d8, 0x000000d7, 0x00000050, 0x00050081, 0x00000014, 0x000000d9, 0x00000098, 0x000000d8, + 0x00070058, 0x00000013, 0x000000da, 0x000000d4, 0x000000d9, 0x00000002, 0x00000050, 0x0008004f, + 0x00000007, 0x000000db, 0x000000da, 0x000000da, 0x00000000, 0x00000001, 0x00000002, 0x000300f7, + 0x000000e0, 0x00000000, 0x000400fa, 0x000000a0, 0x000000df, 0x000000e0, 0x000200f8, 0x000000df, + 0x00050080, 0x0000000a, 0x000000e3, 0x00000065, 0x000000e2, 0x00050041, 0x00000019, 0x00000159, + 0x00000017, 0x00000018, 0x0004003d, 0x0000000a, 0x0000015a, 0x00000159, 0x00050082, 0x0000000a, + 0x0000015b, 0x000000e3, 0x0000015a, 0x00050051, 0x00000009, 0x0000015d, 0x0000015b, 0x00000000, + 0x000500af, 0x0000001d, 0x0000015e, 0x0000015d, 0x00000023, 0x000300f7, 0x00000163, 0x00000000, + 0x000400fa, 0x0000015e, 0x0000015f, 0x00000163, 0x000200f8, 0x0000015f, 0x00050051, 0x00000009, + 0x00000161, 0x0000015b, 0x00000001, 0x000500af, 0x0000001d, 0x00000162, 0x00000161, 0x00000023, + 0x000200f9, 0x00000163, 0x000200f8, 0x00000163, 0x000700f5, 0x0000001d, 0x00000164, 0x0000015e, + 0x000000df, 0x00000162, 0x0000015f, 0x000300f7, 0x0000016b, 0x00000000, 0x000400fa, 0x00000164, + 0x00000165, 0x0000016b, 0x000200f8, 0x00000165, 0x000500b1, 0x0000001d, 0x0000016a, 0x0000015d, + 0x0000009f, 0x000200f9, 0x0000016b, 0x000200f8, 0x0000016b, 0x000700f5, 0x0000001d, 0x0000016c, + 0x00000164, 0x00000163, 0x0000016a, 0x00000165, 0x000300f7, 0x00000173, 0x00000000, 0x000400fa, + 0x0000016c, 0x0000016d, 0x00000173, 0x000200f8, 0x0000016d, 0x00050051, 0x00000009, 0x0000016f, + 0x0000015b, 0x00000001, 0x00060041, 0x00000031, 0x00000170, 0x00000017, 0x00000030, 0x00000027, + 0x0004003d, 0x00000009, 0x00000171, 0x00000170, 0x000500b1, 0x0000001d, 0x00000172, 0x0000016f, + 0x00000171, 0x000200f9, 0x00000173, 0x000200f8, 0x00000173, 0x000700f5, 0x0000001d, 0x00000174, + 0x0000016c, 0x0000016b, 0x00000172, 0x0000016d, 0x000300f7, 0x00000187, 0x00000000, 0x000400fa, + 0x00000174, 0x00000175, 0x00000187, 0x000200f8, 0x00000175, 0x0004003d, 0x00000043, 0x00000176, + 0x00000045, 0x0004006f, 0x00000014, 0x00000178, 0x0000015b, 0x00050081, 0x00000014, 0x0000017a, + 0x00000178, 0x000001a9, 0x00050041, 0x00000019, 0x0000017b, 0x00000017, 0x00000030, 0x0004003d, + 0x0000000a, 0x0000017c, 0x0000017b, 0x0004006f, 0x00000014, 0x0000017d, 0x0000017c, 0x00050088, + 0x00000014, 0x0000017e, 0x0000017a, 0x0000017d, 0x00070058, 0x00000013, 0x0000017f, 0x00000176, + 0x0000017e, 0x00000002, 0x00000050, 0x0008004f, 0x00000007, 0x00000182, 0x0000017f, 0x0000017f, + 0x00000002, 0x00000001, 0x00000000, 0x00050051, 0x00000006, 0x00000184, 0x0000017f, 0x00000003, + 0x00060050, 0x00000007, 0x00000185, 0x00000184, 0x00000184, 0x00000184, 0x0008000c, 0x00000007, + 0x00000186, 0x00000001, 0x0000002e, 0x000000db, 0x00000182, 0x00000185, 0x000200f9, 0x00000187, + 0x000200f8, 0x00000187, 0x000700f5, 0x00000007, 0x000001a0, 0x000000db, 0x00000173, 0x00000186, + 0x00000175, 0x000200f9, 0x000000e0, 0x000200f8, 0x000000e0, 0x000700f5, 0x00000007, 0x000001a7, + 0x000000db, 0x000000cd, 0x000001a0, 0x00000187, 0x00050081, 0x00000007, 0x000000eb, 0x0000019c, + 0x000001a7, 0x0005008e, 0x00000007, 0x000000ec, 0x000000eb, 0x00000049, 0x00050041, 0x000000a9, + 0x000000ee, 0x00000017, 0x000000c3, 0x0004003d, 0x00000013, 0x000000ef, 0x000000ee, 0x0008004f, + 0x00000007, 0x000000f0, 0x000000ef, 0x000000ef, 0x00000000, 0x00000001, 0x00000002, 0x00050094, + 0x00000006, 0x000000f2, 0x000000f0, 0x000000ec, 0x00060041, 0x000000af, 0x000000f3, 0x00000017, + 0x000000c3, 0x00000055, 0x0004003d, 0x00000006, 0x000000f4, 0x000000f3, 0x00050081, 0x00000006, + 0x000000f5, 0x000000f2, 0x000000f4, 0x00050041, 0x000000a9, 0x000000f8, 0x00000017, 0x000000f7, + 0x0004003d, 0x00000013, 0x000000f9, 0x000000f8, 0x0008004f, 0x00000007, 0x000000fa, 0x000000f9, + 0x000000f9, 0x00000000, 0x00000001, 0x00000002, 0x00050094, 0x00000006, 0x000000fc, 0x000000fa, + 0x000000ec, 0x00060041, 0x000000af, 0x000000fd, 0x00000017, 0x000000f7, 0x00000055, 0x0004003d, + 0x00000006, 0x000000fe, 0x000000fd, 0x00050081, 0x00000006, 0x000000ff, 0x000000fc, 0x000000fe, + 0x0004003d, 0x00000100, 0x00000103, 0x00000102, 0x000500c3, 0x0000000a, 0x00000106, 0x00000065, + 0x000001aa, 0x00060041, 0x000000af, 0x00000109, 0x00000017, 0x00000108, 0x0000001f, 0x0004003d, + 0x00000006, 0x0000010a, 0x00000109, 0x00050085, 0x00000006, 0x0000010b, 0x000000f5, 0x0000010a, + 0x00060041, 0x000000af, 0x0000010c, 0x00000017, 0x00000108, 0x00000027, 0x0004003d, 0x00000006, + 0x0000010d, 0x0000010c, 0x00050081, 0x00000006, 0x0000010e, 0x0000010b, 0x0000010d, 0x00050085, + 0x00000006, 0x00000112, 0x000000ff, 0x0000010a, 0x00050081, 0x00000006, 0x00000115, 0x00000112, + 0x0000010d, 0x00070050, 0x00000013, 0x00000116, 0x0000010e, 0x00000115, 0x00000050, 0x00000050, + 0x00040063, 0x00000103, 0x00000106, 0x00000116, 0x000200f9, 0x000000ce, 0x000200f8, 0x000000ce, + 0x000200f9, 0x00000119, 0x000200f8, 0x00000119, 0x000100fd, 0x00010038 +}; + +static const size_t rgb2nv12_comp_spv_size = sizeof(rgb2nv12_comp_spv); diff --git a/src/platform/linux/vulkan_encode.cpp b/src/platform/linux/vulkan_encode.cpp new file mode 100644 index 00000000000..2f43ee34b9b --- /dev/null +++ b/src/platform/linux/vulkan_encode.cpp @@ -0,0 +1,842 @@ +/** + * @file src/platform/linux/vulkan_encode.cpp + * @brief Vulkan-native encoder: DMA-BUF → Vulkan compute (RGB→NV12) → Vulkan Video encode. + * No EGL/GL dependency — all GPU work stays in a single Vulkan queue. + */ +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +} + +#include + +#include "vulkan_encode.h" +#include "graphics.h" +#include "src/config.h" +#include "src/logging.h" +#include "src/video_colorspace.h" +#include "shaders/rgb2nv12.spv.h" + +using namespace std::literals; + +namespace vk { + + // Match a DRI render node path to a Vulkan device index via VK_EXT_physical_device_drm. + // Returns the index as a string (e.g. "1"), or empty string if no match. + static std::string find_vulkan_index_for_render_node(const char *render_path) { + struct stat node_stat; + if (stat(render_path, &node_stat) < 0) return {}; + + auto target_major = major(node_stat.st_rdev); + auto target_minor = minor(node_stat.st_rdev); + + VkApplicationInfo app = {VK_STRUCTURE_TYPE_APPLICATION_INFO}; + app.apiVersion = VK_API_VERSION_1_1; + VkInstanceCreateInfo ci = {VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO}; + ci.pApplicationInfo = &app; + VkInstance inst = VK_NULL_HANDLE; + if (vkCreateInstance(&ci, nullptr, &inst) != VK_SUCCESS) return {}; + + uint32_t count = 0; + vkEnumeratePhysicalDevices(inst, &count, nullptr); + std::vector devs(count); + vkEnumeratePhysicalDevices(inst, &count, devs.data()); + + std::string result; + for (uint32_t i = 0; i < count; i++) { + VkPhysicalDeviceDrmPropertiesEXT drm = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT}; + VkPhysicalDeviceProperties2 props2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; + props2.pNext = &drm; + vkGetPhysicalDeviceProperties2(devs[i], &props2); + if (drm.hasRender && drm.renderMajor == (int64_t)target_major && drm.renderMinor == (int64_t)target_minor) { + result = std::to_string(i); + break; + } + } + vkDestroyInstance(inst, nullptr); + return result; + } + + static int create_vulkan_hwdevice(AVBufferRef **hw_device_buf) { + // Resolve render device path to Vulkan device index + auto render_path = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name; + if (render_path[0] == '/') { + auto idx = find_vulkan_index_for_render_node(render_path.c_str()); + if (!idx.empty()) { + if (av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, idx.c_str(), nullptr, 0) >= 0) + return 0; + } + } else { + // Non-path: treat as device name substring or numeric index + if (av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, render_path.c_str(), nullptr, 0) >= 0) + return 0; + } + // Final fallback: let FFmpeg pick default + if (av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, nullptr, nullptr, 0) >= 0) + return 0; + return -1; + } + + struct PushConstants { + float color_vec_y[4]; + float color_vec_u[4]; + float color_vec_v[4]; + float range_y[2]; + float range_uv[2]; + int32_t src_offset[2]; + int32_t src_size[2]; + int32_t dst_size[2]; + int32_t cursor_pos[2]; + int32_t cursor_size[2]; + }; + + // Helper to check VkResult + #define VK_CHECK(expr) do { VkResult _r = (expr); if (_r != VK_SUCCESS) { \ + BOOST_LOG(error) << #expr << " failed: " << _r; return -1; } } while(0) + #define VK_CHECK_BOOL(expr) do { VkResult _r = (expr); if (_r != VK_SUCCESS) { \ + BOOST_LOG(error) << #expr << " failed: " << _r; return false; } } while(0) + + class vk_vram_t: public platf::avcodec_encode_device_t { + public: + ~vk_vram_t() { + cleanup_pipeline(); + } + + int init(int in_width, int in_height, int in_offset_x = 0, int in_offset_y = 0) { + width = in_width; + height = in_height; + offset_x = in_offset_x; + offset_y = in_offset_y; + this->data = (void *) init_hw_device; + return 0; + } + + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx_buf) override { + this->hwframe.reset(frame); + this->frame = frame; + this->hw_frames_ctx = hw_frames_ctx_buf; + + auto *frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data; + auto *dev_ctx = (AVHWDeviceContext *) frames_ctx->device_ref->data; + vk_dev_ctx = (AVVulkanDeviceContext *) dev_ctx->hwctx; + dev = vk_dev_ctx->act_dev; + phys_dev = vk_dev_ctx->phys_dev; + + { + VkPhysicalDeviceProperties p; + vkGetPhysicalDeviceProperties(phys_dev, &p); + BOOST_LOG(info) << "Vulkan encode using GPU: " << p.deviceName; + } + + // Find a compute-capable queue family from FFmpeg's context + compute_qf = -1; + for (int i = 0; i < vk_dev_ctx->nb_qf; i++) { + if (vk_dev_ctx->qf[i].flags & VK_QUEUE_COMPUTE_BIT) { + compute_qf = vk_dev_ctx->qf[i].idx; + break; + } + } + if (compute_qf < 0) { + BOOST_LOG(error) << "No compute queue family in Vulkan device"sv; + return -1; + } + + vkGetDeviceQueue(dev, compute_qf, 0, &compute_queue); + + // Load extension functions + vkGetMemoryFdPropertiesKHR_fn = (PFN_vkGetMemoryFdPropertiesKHR) + vkGetDeviceProcAddr(dev, "vkGetMemoryFdPropertiesKHR"); + + if (!create_compute_pipeline()) return -1; + if (!create_command_resources()) return -1; + + return 0; + } + + void apply_colorspace() override { + auto *colors = video::color_vectors_from_colorspace(colorspace, true); + if (colors) { + memcpy(push.color_vec_y, colors->color_vec_y, sizeof(push.color_vec_y)); + memcpy(push.color_vec_u, colors->color_vec_u, sizeof(push.color_vec_u)); + memcpy(push.color_vec_v, colors->color_vec_v, sizeof(push.color_vec_v)); + memcpy(push.range_y, colors->range_y, sizeof(push.range_y)); + memcpy(push.range_uv, colors->range_uv, sizeof(push.range_uv)); + } + } + + void init_hwframes(AVHWFramesContext *frames) override { + frames->initial_pool_size = 4; + auto *vk_frames = (AVVulkanFramesContext *)frames->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = (VkImageUsageFlagBits)( + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + } + + int convert(platf::img_t &img) override { + auto &descriptor = (egl::img_descriptor_t &) img; + + // Get encoder target frame + if (!frame->buf[0]) { + if (av_hwframe_get_buffer(hw_frames_ctx, frame, 0) < 0) { + BOOST_LOG(error) << "Failed to get Vulkan frame"sv; + return -1; + } + } + + // Import new DMA-BUF as VkImage when capture sequence changes + if (descriptor.sequence == 0) { + // Dummy frame — clear the target + return 0; + } + + if (descriptor.sequence > sequence) { + sequence = descriptor.sequence; + if (!import_dmabuf(descriptor.sd)) { + BOOST_LOG(error) << "Failed to import DMA-BUF"sv; + return -1; + } + descriptors_dirty = true; + } + + if (src.image == VK_NULL_HANDLE) return -1; + + // Setup Y/UV image views for the encoder target (once) + if (!target_views_created) { + if (!create_target_views()) return -1; + target_views_created = true; + descriptors_dirty = true; + } + + // Update descriptor set only when source or target changed + if (descriptors_dirty) { + update_descriptors(); + descriptors_dirty = false; + } + + if (descriptor.data && descriptor.serial != cursor_serial) { + cursor_serial = descriptor.serial; + if (!create_cursor_image(descriptor.src_w, descriptor.src_h, descriptor.data)) + return -1; + update_descriptors(); + descriptors_dirty = false; + } + + // Fill push constants + push.src_offset[0] = offset_x; + push.src_offset[1] = offset_y; + push.src_size[0] = width; + push.src_size[1] = height; + push.dst_size[0] = frame->width; + push.dst_size[1] = frame->height; + + if (descriptor.data) { + float scale_x = (float)frame->width / width; + float scale_y = (float)frame->height / height; + push.cursor_pos[0] = (int32_t)((descriptor.x - offset_x) * scale_x); + push.cursor_pos[1] = (int32_t)((descriptor.y - offset_y) * scale_y); + push.cursor_size[0] = (int32_t)(descriptor.width * scale_x); + push.cursor_size[1] = (int32_t)(descriptor.height * scale_y); + } else { + push.cursor_size[0] = 0; + } + + // Record and submit compute dispatch + return dispatch_compute(); + } + + private: + bool create_compute_pipeline() { + // Shader module + VkShaderModuleCreateInfo shader_ci = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO}; + shader_ci.codeSize = rgb2nv12_comp_spv_size; + shader_ci.pCode = rgb2nv12_comp_spv; + VK_CHECK_BOOL(vkCreateShaderModule(dev, &shader_ci, nullptr, &shader_module)); + + // Descriptor set layout: binding 0=sampler, 1=Y storage, 2=UV storage, 3=cursor sampler + VkDescriptorSetLayoutBinding bindings[4] = {}; + bindings[0] = {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[1] = {1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[2] = {2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[3] = {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + + VkDescriptorSetLayoutCreateInfo ds_layout_ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + ds_layout_ci.bindingCount = 4; + ds_layout_ci.pBindings = bindings; + VK_CHECK_BOOL(vkCreateDescriptorSetLayout(dev, &ds_layout_ci, nullptr, &ds_layout)); + + // Push constant range + VkPushConstantRange pc_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants)}; + + VkPipelineLayoutCreateInfo pl_ci = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + pl_ci.setLayoutCount = 1; + pl_ci.pSetLayouts = &ds_layout; + pl_ci.pushConstantRangeCount = 1; + pl_ci.pPushConstantRanges = &pc_range; + VK_CHECK_BOOL(vkCreatePipelineLayout(dev, &pl_ci, nullptr, &pipeline_layout)); + + // Compute pipeline + VkComputePipelineCreateInfo comp_ci = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + comp_ci.stage = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO}; + comp_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + comp_ci.stage.module = shader_module; + comp_ci.stage.pName = "main"; + comp_ci.layout = pipeline_layout; + VK_CHECK_BOOL(vkCreateComputePipelines(dev, VK_NULL_HANDLE, 1, &comp_ci, nullptr, &pipeline)); + + // Descriptor pool + VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2}, + }; + VkDescriptorPoolCreateInfo pool_ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + pool_ci.maxSets = 1; + pool_ci.poolSizeCount = 2; + pool_ci.pPoolSizes = pool_sizes; + VK_CHECK_BOOL(vkCreateDescriptorPool(dev, &pool_ci, nullptr, &desc_pool)); + + VkDescriptorSetAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + alloc_info.descriptorPool = desc_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &ds_layout; + VK_CHECK_BOOL(vkAllocateDescriptorSets(dev, &alloc_info, &desc_set)); + + // Sampler for source image + VkSamplerCreateInfo sampler_ci = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + sampler_ci.magFilter = VK_FILTER_LINEAR; + sampler_ci.minFilter = VK_FILTER_LINEAR; + sampler_ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VK_CHECK_BOOL(vkCreateSampler(dev, &sampler_ci, nullptr, &sampler)); + + if (!create_cursor_image(1, 1, nullptr)) return false; + + return true; + } + + bool create_command_resources() { + VkCommandPoolCreateInfo pool_ci = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + pool_ci.queueFamilyIndex = compute_qf; + pool_ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_BOOL(vkCreateCommandPool(dev, &pool_ci, nullptr, &cmd_pool)); + + VkCommandBufferAllocateInfo alloc_ci = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + alloc_ci.commandPool = cmd_pool; + alloc_ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + alloc_ci.commandBufferCount = CMD_RING_SIZE; + VK_CHECK_BOOL(vkAllocateCommandBuffers(dev, &alloc_ci, cmd_ring)); + + return true; + } + + static VkFormat drm_fourcc_to_vk_format(uint32_t fourcc) { + switch (fourcc) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: return VK_FORMAT_B8G8R8A8_UNORM; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: return VK_FORMAT_R8G8B8A8_UNORM; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: return VK_FORMAT_A2R10G10B10_UNORM_PACK32; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + default: + BOOST_LOG(warning) << "Unknown DRM fourcc 0x" << std::hex << fourcc << std::dec << ", assuming B8G8R8A8"; + return VK_FORMAT_B8G8R8A8_UNORM; + } + } + + bool import_dmabuf(const egl::surface_descriptor_t &sd) { + destroy_src_image(); + + int fd = dup(sd.fds[0]); + if (fd < 0) return false; + + // Query memory requirements for this DMA-BUF + VkMemoryFdPropertiesKHR fd_props = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR}; + if (vkGetMemoryFdPropertiesKHR_fn) { + vkGetMemoryFdPropertiesKHR_fn(dev, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fd_props); + } + + // Create VkImage for the DMA-BUF + VkExternalMemoryImageCreateInfo ext_ci = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO}; + ext_ci.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + + VkSubresourceLayout drm_layout = {}; + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_ci = { + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT}; + VkImageTiling tiling; + + if (sd.modifier != DRM_FORMAT_MOD_INVALID) { + drm_layout.offset = sd.offsets[0]; + drm_layout.rowPitch = sd.pitches[0]; + drm_ci.drmFormatModifier = sd.modifier; + drm_ci.drmFormatModifierPlaneCount = 1; + drm_ci.pPlaneLayouts = &drm_layout; + ext_ci.pNext = &drm_ci; + tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + } else { + tiling = VK_IMAGE_TILING_LINEAR; + } + + auto vk_format = drm_fourcc_to_vk_format(sd.fourcc); + + VkImageCreateInfo img_ci = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + img_ci.pNext = &ext_ci; + img_ci.imageType = VK_IMAGE_TYPE_2D; + img_ci.format = vk_format; + img_ci.extent = {(uint32_t)sd.width, (uint32_t)sd.height, 1}; + img_ci.mipLevels = 1; + img_ci.arrayLayers = 1; + img_ci.samples = VK_SAMPLE_COUNT_1_BIT; + img_ci.tiling = tiling; + img_ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + img_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + auto res = vkCreateImage(dev, &img_ci, nullptr, &src.image); + if (res != VK_SUCCESS) { + close(fd); + BOOST_LOG(error) << "vkCreateImage for DMA-BUF failed: " << res + << " (modifier=0x" << std::hex << sd.modifier << std::dec + << ", pitch=" << sd.pitches[0] << ", offset=" << sd.offsets[0] << ")"; + return false; + } + + // Bind imported DMA-BUF memory + VkMemoryRequirements mem_req; + vkGetImageMemoryRequirements(dev, src.image, &mem_req); + + VkImportMemoryFdInfoKHR import_fd = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR}; + import_fd.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + import_fd.fd = fd; // Vulkan takes ownership + + VkMemoryAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + alloc_info.pNext = &import_fd; + alloc_info.allocationSize = mem_req.size; + alloc_info.memoryTypeIndex = find_memory_type( + fd_props.memoryTypeBits ? fd_props.memoryTypeBits : mem_req.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + VkDeviceMemory src_mem = VK_NULL_HANDLE; + res = vkAllocateMemory(dev, &alloc_info, nullptr, &src_mem); + if (res != VK_SUCCESS) { + BOOST_LOG(error) << "vkAllocateMemory for DMA-BUF failed: " << res; + vkDestroyImage(dev, src.image, nullptr); + src.image = VK_NULL_HANDLE; + return false; + } + + vkBindImageMemory(dev, src.image, src_mem, 0); + + // Create image view (Vulkan sampling always returns RGBA order regardless of memory layout) + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = src.image; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.format = vk_format; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &src.view)); + + src.mem = src_mem; + return true; + } + + bool create_cursor_image(int w, int h, const uint8_t *pixels) { + destroy_cursor_image(); + + VkImageCreateInfo img_ci = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + img_ci.imageType = VK_IMAGE_TYPE_2D; + img_ci.format = VK_FORMAT_B8G8R8A8_UNORM; + img_ci.extent = {(uint32_t)w, (uint32_t)h, 1}; + img_ci.mipLevels = 1; + img_ci.arrayLayers = 1; + img_ci.samples = VK_SAMPLE_COUNT_1_BIT; + img_ci.tiling = VK_IMAGE_TILING_LINEAR; + img_ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + img_ci.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + VK_CHECK_BOOL(vkCreateImage(dev, &img_ci, nullptr, &cursor.image)); + + VkMemoryRequirements mem_req; + vkGetImageMemoryRequirements(dev, cursor.image, &mem_req); + VkMemoryAllocateInfo alloc = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + alloc.allocationSize = mem_req.size; + alloc.memoryTypeIndex = find_memory_type(mem_req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + VK_CHECK_BOOL(vkAllocateMemory(dev, &alloc, nullptr, &cursor.mem)); + VK_CHECK_BOOL(vkBindImageMemory(dev, cursor.image, cursor.mem, 0)); + + if (pixels) { + void *mapped; + VK_CHECK_BOOL(vkMapMemory(dev, cursor.mem, 0, VK_WHOLE_SIZE, 0, &mapped)); + VkImageSubresource subres = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0}; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(dev, cursor.image, &subres, &layout); + for (int y = 0; y < h; y++) + memcpy((uint8_t *)mapped + layout.offset + y * layout.rowPitch, pixels + y * w * 4, w * 4); + vkUnmapMemory(dev, cursor.mem); + } + + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = cursor.image; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.format = VK_FORMAT_B8G8R8A8_UNORM; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &cursor.view)); + + cursor.needs_transition = true; + descriptors_dirty = true; + return true; + } + + void destroy_cursor_image() { + if (cursor.view) { vkDestroyImageView(dev, cursor.view, nullptr); cursor.view = VK_NULL_HANDLE; } + if (cursor.image) { vkDestroyImage(dev, cursor.image, nullptr); cursor.image = VK_NULL_HANDLE; } + if (cursor.mem) { vkFreeMemory(dev, cursor.mem, nullptr); cursor.mem = VK_NULL_HANDLE; } + } + + bool create_target_views() { + AVVkFrame *vk_frame = (AVVkFrame *) frame->data[0]; + if (!vk_frame) return false; + + // Detect multiplane vs multi-image layout + int num_imgs = 0; + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) num_imgs++; + + if (num_imgs == 1) { + // Single multiplane image — create plane views + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = vk_frame->img[0]; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + + // Y plane + view_ci.format = VK_FORMAT_R8_UNORM; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_PLANE_0_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &y_view)); + + // UV plane + view_ci.format = VK_FORMAT_R8G8_UNORM; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_PLANE_1_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &uv_view)); + } else { + // Separate images per plane + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + view_ci.image = vk_frame->img[0]; + view_ci.format = VK_FORMAT_R8_UNORM; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &y_view)); + + view_ci.image = vk_frame->img[1]; + view_ci.format = VK_FORMAT_R8G8_UNORM; + VK_CHECK_BOOL(vkCreateImageView(dev, &view_ci, nullptr, &uv_view)); + } + return true; + } + + void update_descriptors() { + VkDescriptorImageInfo src_info = {sampler, src.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo y_info = {VK_NULL_HANDLE, y_view, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo uv_info = {VK_NULL_HANDLE, uv_view, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo cursor_info = {sampler, cursor.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + VkWriteDescriptorSet writes[4] = {}; + writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, desc_set, 0, 0, 1, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &src_info, nullptr, nullptr}; + writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, desc_set, 1, 0, 1, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &y_info, nullptr, nullptr}; + writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, desc_set, 2, 0, 1, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &uv_info, nullptr, nullptr}; + writes[3] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, desc_set, 3, 0, 1, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &cursor_info, nullptr, nullptr}; + vkUpdateDescriptorSets(dev, 4, writes, 0, nullptr); + } + + int dispatch_compute() { + AVVkFrame *vk_frame = (AVVkFrame *) frame->data[0]; + int num_imgs = 0; + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) num_imgs++; + + // Rotate to next command buffer. With CMD_RING_SIZE slots, the buffer + // we're about to reuse was submitted CMD_RING_SIZE frames ago. + // At 60fps that's ~50ms for a <1ms compute dispatch — always complete. + // No fences, no semaphore waits, no CPU blocking. + auto cmd_buf = cmd_ring[cmd_ring_idx]; + cmd_ring_idx = (cmd_ring_idx + 1) % CMD_RING_SIZE; + + VkCommandBufferBeginInfo begin_ci = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + begin_ci.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK(vkBeginCommandBuffer(cmd_buf, &begin_ci)); + + // Transition source image to SHADER_READ_ONLY + VkImageMemoryBarrier src_barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + src_barrier.srcAccessMask = 0; + src_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + src_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + src_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + src_barrier.image = src.image; + src_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + src_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + src_barrier.dstQueueFamilyIndex = compute_qf; + + vkCmdPipelineBarrier(cmd_buf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &src_barrier); + + // Transition cursor image if needed + if (cursor.needs_transition) { + VkImageMemoryBarrier cursor_barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + cursor_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + cursor_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + cursor_barrier.oldLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + cursor_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + cursor_barrier.image = cursor.image; + cursor_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + cursor_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + cursor_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vkCmdPipelineBarrier(cmd_buf, + VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &cursor_barrier); + cursor.needs_transition = false; + } + + // Transition target planes to GENERAL for storage writes + VkImageMemoryBarrier dst_barriers[2] = {}; + int num_dst_barriers = (num_imgs == 1) ? 1 : 2; + for (int i = 0; i < num_dst_barriers; i++) { + dst_barriers[i] = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + dst_barriers[i].srcAccessMask = target_initialized ? VK_ACCESS_SHADER_READ_BIT : 0; + dst_barriers[i].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + dst_barriers[i].oldLayout = target_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED; + dst_barriers[i].newLayout = VK_IMAGE_LAYOUT_GENERAL; + dst_barriers[i].image = vk_frame->img[num_imgs == 1 ? 0 : i]; + dst_barriers[i].subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + dst_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + dst_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + } + + vkCmdPipelineBarrier(cmd_buf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, num_dst_barriers, dst_barriers); + + // Bind pipeline and dispatch + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout, 0, 1, &desc_set, 0, nullptr); + vkCmdPushConstants(cmd_buf, pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(PushConstants), &push); + + uint32_t gx = (frame->width + 15) / 16; + uint32_t gy = (frame->height + 15) / 16; + vkCmdDispatch(cmd_buf, gx, gy, 1); + + VK_CHECK(vkEndCommandBuffer(cmd_buf)); + + // Submit with timeline semaphore signaling for FFmpeg + VkTimelineSemaphoreSubmitInfo timeline_info = {VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO}; + VkSemaphore wait_sems[AV_NUM_DATA_POINTERS], signal_sems[AV_NUM_DATA_POINTERS]; + uint64_t wait_vals[AV_NUM_DATA_POINTERS], signal_vals[AV_NUM_DATA_POINTERS]; + VkPipelineStageFlags wait_stages[AV_NUM_DATA_POINTERS]; + int sem_count = 0; + + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->sem[i]; i++) { + wait_sems[sem_count] = vk_frame->sem[i]; + wait_vals[sem_count] = vk_frame->sem_value[i]; + wait_stages[sem_count] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + signal_sems[sem_count] = vk_frame->sem[i]; + signal_vals[sem_count] = vk_frame->sem_value[i] + 1; + vk_frame->sem_value[i]++; + sem_count++; + } + + timeline_info.waitSemaphoreValueCount = sem_count; + timeline_info.pWaitSemaphoreValues = wait_vals; + timeline_info.signalSemaphoreValueCount = sem_count; + timeline_info.pSignalSemaphoreValues = signal_vals; + + VkSubmitInfo submit = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + submit.pNext = &timeline_info; + submit.waitSemaphoreCount = sem_count; + submit.pWaitSemaphores = wait_sems; + submit.pWaitDstStageMask = wait_stages; + submit.commandBufferCount = 1; + submit.pCommandBuffers = &cmd_buf; + submit.signalSemaphoreCount = sem_count; + submit.pSignalSemaphores = signal_sems; + + // Lock the queue (FFmpeg requires this) + vk_dev_ctx->lock_queue( + (AVHWDeviceContext *)((AVHWFramesContext *)hw_frames_ctx->data)->device_ref->data, + compute_qf, 0); + auto res = vkQueueSubmit(compute_queue, 1, &submit, VK_NULL_HANDLE); + vk_dev_ctx->unlock_queue( + (AVHWDeviceContext *)((AVHWFramesContext *)hw_frames_ctx->data)->device_ref->data, + compute_qf, 0); + + if (res != VK_SUCCESS) { + BOOST_LOG(error) << "vkQueueSubmit failed: " << res; + return -1; + } + + // Update frame layouts for FFmpeg + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) { + vk_frame->layout[i] = VK_IMAGE_LAYOUT_GENERAL; + vk_frame->access[i] = (VkAccessFlagBits)VK_ACCESS_SHADER_WRITE_BIT; + } + + target_initialized = true; + + return 0; + } + + uint32_t find_memory_type(uint32_t type_bits, VkMemoryPropertyFlags props) { + VkPhysicalDeviceMemoryProperties mem_props; + vkGetPhysicalDeviceMemoryProperties(phys_dev, &mem_props); + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + if ((type_bits & (1 << i)) && (mem_props.memoryTypes[i].propertyFlags & props) == props) + return i; + } + // Fallback: any matching type bit + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + if (type_bits & (1 << i)) return i; + } + return 0; + } + + void destroy_src_image() { + if (src.image) { + // Defer destruction — the GPU may still be using this image. + // By the time we wrap around (4 frames later), it's guaranteed done. + auto &slot = defer_ring[defer_idx]; + if (slot.view) vkDestroyImageView(dev, slot.view, nullptr); + if (slot.image) vkDestroyImage(dev, slot.image, nullptr); + if (slot.mem) vkFreeMemory(dev, slot.mem, nullptr); + slot = src; + defer_idx = (defer_idx + 1) % DEFER_RING_SIZE; + } + src = {}; + } + + void cleanup_pipeline() { + if (!dev) return; + vkDeviceWaitIdle(dev); + destroy_src_image(); + // Flush deferred destroys + for (auto &slot : defer_ring) { + if (slot.view) vkDestroyImageView(dev, slot.view, nullptr); + if (slot.image) vkDestroyImage(dev, slot.image, nullptr); + if (slot.mem) vkFreeMemory(dev, slot.mem, nullptr); + slot = {}; + } + if (y_view) vkDestroyImageView(dev, y_view, nullptr); + if (uv_view) vkDestroyImageView(dev, uv_view, nullptr); + destroy_cursor_image(); + if (cmd_pool) vkDestroyCommandPool(dev, cmd_pool, nullptr); + if (sampler) vkDestroySampler(dev, sampler, nullptr); + if (desc_pool) vkDestroyDescriptorPool(dev, desc_pool, nullptr); + if (pipeline) vkDestroyPipeline(dev, pipeline, nullptr); + if (pipeline_layout) vkDestroyPipelineLayout(dev, pipeline_layout, nullptr); + if (ds_layout) vkDestroyDescriptorSetLayout(dev, ds_layout, nullptr); + if (shader_module) vkDestroyShaderModule(dev, shader_module, nullptr); + } + + static int init_hw_device(platf::avcodec_encode_device_t *, AVBufferRef **hw_device_buf) { + return create_vulkan_hwdevice(hw_device_buf); + } + + // Dimensions + int width = 0, height = 0; + int offset_x = 0, offset_y = 0; + AVBufferRef *hw_frames_ctx = nullptr; + frame_t hwframe; + std::uint64_t sequence = 0; + + // Vulkan device (from FFmpeg) + VkDevice dev = VK_NULL_HANDLE; + VkPhysicalDevice phys_dev = VK_NULL_HANDLE; + AVVulkanDeviceContext *vk_dev_ctx = nullptr; + int compute_qf = -1; + VkQueue compute_queue = VK_NULL_HANDLE; + + // Compute pipeline + VkShaderModule shader_module = VK_NULL_HANDLE; + VkDescriptorSetLayout ds_layout = VK_NULL_HANDLE; + VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; + VkPipeline pipeline = VK_NULL_HANDLE; + VkDescriptorPool desc_pool = VK_NULL_HANDLE; + VkDescriptorSet desc_set = VK_NULL_HANDLE; + VkSampler sampler = VK_NULL_HANDLE; + + // Command submission — ring of buffers to avoid reuse while in-flight. + // No CPU waits: by the time we wrap around, the old submission is long done. + static constexpr int CMD_RING_SIZE = 3; + VkCommandPool cmd_pool = VK_NULL_HANDLE; + VkCommandBuffer cmd_ring[CMD_RING_SIZE] = {}; + int cmd_ring_idx = 0; + + // Source DMA-BUF image with deferred destruction + struct src_image_t { + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory mem = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + }; + src_image_t src = {}; + static constexpr int DEFER_RING_SIZE = 4; + src_image_t defer_ring[DEFER_RING_SIZE] = {}; + int defer_idx = 0; + + // Target NV12 plane views + VkImageView y_view = VK_NULL_HANDLE; + VkImageView uv_view = VK_NULL_HANDLE; + bool target_views_created = false; + bool target_initialized = false; + bool descriptors_dirty = false; + + // Cursor image + struct { + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory mem = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + bool needs_transition = false; + } cursor = {}; + unsigned long cursor_serial = 0; + + // Push constants (color matrix) + PushConstants push = {}; + + PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdPropertiesKHR_fn = nullptr; + }; + + // Free functions + + int vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *, AVBufferRef **hw_device_buf) { + return create_vulkan_hwdevice(hw_device_buf); + } + + bool validate() { + if (!avcodec_find_encoder_by_name("h264_vulkan") && !avcodec_find_encoder_by_name("hevc_vulkan")) + return false; + AVBufferRef *dev = nullptr; + if (create_vulkan_hwdevice(&dev) < 0) + return false; + av_buffer_unref(&dev); + return true; + } + + std::unique_ptr make_avcodec_encode_device_vram(int w, int h, int offset_x, int offset_y) { + auto dev = std::make_unique(); + if (dev->init(w, h, offset_x, offset_y) < 0) return nullptr; + return dev; + } + + std::unique_ptr make_avcodec_encode_device_ram(int, int) { + return nullptr; + } + +} // namespace vk diff --git a/src/platform/linux/vulkan_encode.h b/src/platform/linux/vulkan_encode.h new file mode 100644 index 00000000000..db887f504c7 --- /dev/null +++ b/src/platform/linux/vulkan_encode.h @@ -0,0 +1,36 @@ +/** + * @file src/platform/linux/vulkan_encode.h + * @brief Declarations for FFmpeg Vulkan Video encoder. + */ +#pragma once + +#include "src/platform/common.h" + +extern "C" struct AVBufferRef; + +namespace vk { + + /** + * @brief Initialize Vulkan hardware device for FFmpeg encoding. + * @param encode_device The encode device (vk_t). + * @param hw_device_buf Output hardware device buffer. + * @return 0 on success, negative on error. + */ + int vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf); + + /** + * @brief Create a Vulkan encode device for RAM capture. + */ + std::unique_ptr make_avcodec_encode_device_ram(int width, int height); + + /** + * @brief Create a Vulkan encode device for VRAM capture. + */ + std::unique_ptr make_avcodec_encode_device_vram(int width, int height, int offset_x, int offset_y); + + /** + * @brief Check if FFmpeg Vulkan Video encoding is available. + */ + bool validate(); + +} // namespace vk diff --git a/src/video.cpp b/src/video.cpp index 7487e1278e6..2aa6df3e091 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -122,6 +122,7 @@ namespace video { util::Either vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); util::Either cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); util::Either vt_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); + util::Either vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); class avcodec_software_encode_device_t: public platf::avcodec_encode_device_t { public: @@ -1012,7 +1013,81 @@ namespace video { // RC buffer size will be set in platform code if supported LIMITED_GOP_SIZE | PARALLEL_ENCODING | NO_RC_BUF_LIMIT }; -#endif + +#ifdef SUNSHINE_BUILD_VULKAN + encoder_t vulkan { + "vulkan"sv, + std::make_unique( + AV_HWDEVICE_TYPE_VULKAN, + AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NV12, + AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, + AV_PIX_FMT_NONE, + vulkan_init_avcodec_hardware_input_buffer + ), + { + // AV1 + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"quality"s, 1}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "av1_vulkan"s, + }, + { + // HEVC + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"quality"s, 1}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "hevc_vulkan"s, + }, + { + // H.264 + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"quality"s, 1}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "h264_vulkan"s, + }, + LIMITED_GOP_SIZE | PARALLEL_ENCODING + }; +#endif // SUNSHINE_BUILD_VULKAN +#endif // linux #ifdef __APPLE__ encoder_t videotoolbox { @@ -1092,6 +1167,9 @@ namespace video { &mediafoundation, #endif #if defined(__linux__) || defined(linux) || defined(__linux) || defined(__FreeBSD__) +#ifdef SUNSHINE_BUILD_VULKAN + &vulkan, +#endif &vaapi, #endif #ifdef __APPLE__ @@ -2934,6 +3012,43 @@ namespace video { return hw_device_buf; } +#ifdef SUNSHINE_BUILD_VULKAN + typedef int (*vulkan_init_avcodec_hardware_input_buffer_fn)(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf); + + util::Either vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device) { + avcodec_buffer_t hw_device_buf; + + if (encode_device && encode_device->data) { + if (((vulkan_init_avcodec_hardware_input_buffer_fn) encode_device->data)(encode_device, &hw_device_buf)) { + return -1; + } + return hw_device_buf; + } + + // Try render device path first (like VAAPI does), then fallback to device indices + auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str(); + + auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, render_device, nullptr, 0); + if (status >= 0) { + BOOST_LOG(info) << "Using Vulkan device: "sv << render_device; + return hw_device_buf; + } + + // Fallback: try device indices for multi-GPU systems + const char *devices[] = {"1", "0", "2", "3", nullptr}; + for (int i = 0; devices[i]; i++) { + status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, devices[i], nullptr, 0); + if (status >= 0) { + BOOST_LOG(info) << "Using Vulkan device index: "sv << devices[i]; + return hw_device_buf; + } + } + + BOOST_LOG(error) << "Failed to create a Vulkan device"sv; + return -1; + } +#endif + util::Either cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device) { avcodec_buffer_t hw_device_buf; @@ -3031,6 +3146,10 @@ namespace video { return platf::mem_type_e::dxgi; case AV_HWDEVICE_TYPE_VAAPI: return platf::mem_type_e::vaapi; +#ifdef SUNSHINE_BUILD_VULKAN + case AV_HWDEVICE_TYPE_VULKAN: + return platf::mem_type_e::vulkan; +#endif case AV_HWDEVICE_TYPE_CUDA: return platf::mem_type_e::cuda; case AV_HWDEVICE_TYPE_NONE: diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index 222fba0eccd..834701f7e22 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -323,6 +323,14 @@

{{ $t('config.configuration') }}

"vt_realtime": "enabled", }, }, + { + id: "vulkan", + name: "Vulkan Encoder", + options: { + "vk_tune": 2, + "vk_rc_mode": 4, + }, + }, { id: "vaapi", name: "VA-API Encoder", @@ -381,7 +389,7 @@

{{ $t('config.configuration') }}

var app = document.getElementById("app"); if (this.platform === "windows") { this.tabs = this.tabs.filter((el) => { - return el.id !== "vt" && el.id !== "vaapi"; + return el.id !== "vt" && el.id !== "vaapi" && el.id !== "vulkan"; }); } if (this.platform === "freebsd" || this.platform === "linux") { @@ -391,7 +399,7 @@

{{ $t('config.configuration') }}

} if (this.platform === "macos") { this.tabs = this.tabs.filter((el) => { - return el.id !== "amd" && el.id !== "nv" && el.id !== "qsv" && el.id !== "vaapi"; + return el.id !== "amd" && el.id !== "nv" && el.id !== "qsv" && el.id !== "vaapi" && el.id !== "vulkan"; }); } @@ -435,6 +443,7 @@

{{ $t('config.configuration') }}

'qsv': 'Gpu', 'vaapi': 'Gpu', 'vt': 'Gpu', + 'vulkan': 'Gpu', 'sw': 'Cpu', }; return iconMap[tabId] || 'Settings'; diff --git a/src_assets/common/assets/web/configs/tabs/Advanced.vue b/src_assets/common/assets/web/configs/tabs/Advanced.vue index d63d095f2d9..d0b13adf3f7 100644 --- a/src_assets/common/assets/web/configs/tabs/Advanced.vue +++ b/src_assets/common/assets/web/configs/tabs/Advanced.vue @@ -97,11 +97,13 @@ const config = ref(props.config)