From 93dc6d6b60a1099670f5e0dba125cd36ebaa62a9 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Mon, 29 Dec 2025 01:45:49 -0600 Subject: [PATCH] Convert Metal shaders to FP16 --- app/shaders/vt_renderer.metal | 71 +++++++++---------- .../video/ffmpeg-renderers/vt_metal.mm | 28 ++++---- 2 files changed, 44 insertions(+), 55 deletions(-) diff --git a/app/shaders/vt_renderer.metal b/app/shaders/vt_renderer.metal index dd5ee58d..c23e89d6 100644 --- a/app/shaders/vt_renderer.metal +++ b/app/shaders/vt_renderer.metal @@ -1,3 +1,4 @@ +#include using namespace metal; struct Vertex @@ -8,10 +9,10 @@ struct Vertex struct CscParams { - float3 matrix[3]; - float3 offsets; - float2 chromaOffset; - float bitnessScaleFactor; + half3x3 matrix; + half3 offsets; + half2 chromaOffset; + half bitnessScaleFactor; }; constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear); @@ -21,48 +22,40 @@ vertex Vertex vs_draw(constant Vertex *vertices [[ buffer(0) ]], uint id [[ vert return vertices[id]; } -fragment float4 ps_draw_biplanar(Vertex v [[ stage_in ]], +fragment half4 ps_draw_biplanar(Vertex v [[ stage_in ]], + constant CscParams &cscParams [[ buffer(0) ]], + texture2d luminancePlane [[ texture(0) ]], + texture2d chrominancePlane [[ texture(1) ]]) +{ + float2 chromaOffset = float2(cscParams.chromaOffset) / float2(luminancePlane.get_width(), + luminancePlane.get_height()); + half3 yuv = half3(luminancePlane.sample(s, v.texCoords).r, + chrominancePlane.sample(s, v.texCoords + chromaOffset).rg); + yuv *= cscParams.bitnessScaleFactor; + yuv -= cscParams.offsets; + + return half4(yuv * cscParams.matrix, 1.0h); +} + +fragment half4 ps_draw_triplanar(Vertex v [[ stage_in ]], constant CscParams &cscParams [[ buffer(0) ]], - texture2d luminancePlane [[ texture(0) ]], - texture2d chrominancePlane [[ texture(1) ]]) + texture2d luminancePlane [[ texture(0) ]], + texture2d chrominancePlaneU [[ texture(1) ]], + texture2d chrominancePlaneV [[ texture(2) ]]) { - float2 chromaOffset = float2(cscParams.chromaOffset.x / luminancePlane.get_width(), - cscParams.chromaOffset.y / luminancePlane.get_height()); - float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r, - chrominancePlane.sample(s, v.texCoords + chromaOffset).rg); + float2 chromaOffset = float2(cscParams.chromaOffset) / float2(luminancePlane.get_width(), + luminancePlane.get_height()); + half3 yuv = half3(luminancePlane.sample(s, v.texCoords).r, + chrominancePlaneU.sample(s, v.texCoords + chromaOffset).r, + chrominancePlaneV.sample(s, v.texCoords + chromaOffset).r); yuv *= cscParams.bitnessScaleFactor; yuv -= cscParams.offsets; - float3 rgb; - rgb.r = dot(yuv, cscParams.matrix[0]); - rgb.g = dot(yuv, cscParams.matrix[1]); - rgb.b = dot(yuv, cscParams.matrix[2]); - return float4(rgb, 1.0f); + return half4(yuv * cscParams.matrix, 1.0h); } -fragment float4 ps_draw_triplanar(Vertex v [[ stage_in ]], - constant CscParams &cscParams [[ buffer(0) ]], - texture2d luminancePlane [[ texture(0) ]], - texture2d chrominancePlaneU [[ texture(1) ]], - texture2d chrominancePlaneV [[ texture(2) ]]) -{ - float2 chromaOffset = float2(cscParams.chromaOffset.x / luminancePlane.get_width(), - cscParams.chromaOffset.y / luminancePlane.get_height()); - float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r, - chrominancePlaneU.sample(s, v.texCoords + chromaOffset).r, - chrominancePlaneV.sample(s, v.texCoords + chromaOffset).r); - yuv *= cscParams.bitnessScaleFactor; - yuv -= cscParams.offsets; - - float3 rgb; - rgb.r = dot(yuv, cscParams.matrix[0]); - rgb.g = dot(yuv, cscParams.matrix[1]); - rgb.b = dot(yuv, cscParams.matrix[2]); - return float4(rgb, 1.0f); -} - -fragment float4 ps_draw_rgb(Vertex v [[ stage_in ]], - texture2d rgbTexture [[ texture(0) ]]) +fragment half4 ps_draw_rgb(Vertex v [[ stage_in ]], + texture2d rgbTexture [[ texture(0) ]]) { return rgbTexture.sample(s, v.texCoords); } diff --git a/app/streaming/video/ffmpeg-renderers/vt_metal.mm b/app/streaming/video/ffmpeg-renderers/vt_metal.mm index 957127c8..7182b239 100644 --- a/app/streaming/video/ffmpeg-renderers/vt_metal.mm +++ b/app/streaming/video/ffmpeg-renderers/vt_metal.mm @@ -24,15 +24,15 @@ extern "C" { struct CscParams { - simd_float3 matrix[3]; - simd_float3 offsets; + simd_half3x3 matrix; + simd_half3 offsets; }; struct ParamBuffer { CscParams cscParams; - simd_float2 chromaOffset; - float bitnessScaleFactor; + simd_half2 chromaOffset; + simd_half1 bitnessScaleFactor; }; struct Vertex @@ -262,18 +262,14 @@ public: getFramePremultipliedCscConstants(frame, cscMatrix, yuvOffsets); getFrameChromaCositingOffsets(frame, chromaOffset); - // Copy the row-major CSC matrix into column-major for Metal - for (int i = 0; i < 3; i++) { - paramBuffer.cscParams.matrix[i] = simd_make_float3(cscMatrix[0 + i], - cscMatrix[3 + i], - cscMatrix[6 + i]); - } - - paramBuffer.cscParams.offsets = simd_make_float3(yuvOffsets[0], - yuvOffsets[1], - yuvOffsets[2]); - paramBuffer.chromaOffset = simd_make_float2(chromaOffset[0], - chromaOffset[1]); + paramBuffer.cscParams.matrix = simd_matrix(simd_make_half3(cscMatrix[0], cscMatrix[3], cscMatrix[6]), + simd_make_half3(cscMatrix[1], cscMatrix[4], cscMatrix[7]), + simd_make_half3(cscMatrix[2], cscMatrix[5], cscMatrix[8])); + paramBuffer.cscParams.offsets = simd_make_half3(yuvOffsets[0], + yuvOffsets[1], + yuvOffsets[2]); + paramBuffer.chromaOffset = simd_make_half2(chromaOffset[0], + chromaOffset[1]); // Set the EDR metadata for HDR10 to enable OS tonemapping if (frame->color_trc == AVCOL_TRC_SMPTE2084 && m_MasteringDisplayColorVolume != nullptr) {