Convert Metal shaders to FP16
This commit is contained in:
parent
8ee82421b3
commit
93dc6d6b60
2 changed files with 44 additions and 55 deletions
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <metal_stdlib>
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
struct Vertex
|
struct Vertex
|
||||||
|
|
@ -8,10 +9,10 @@ struct Vertex
|
||||||
|
|
||||||
struct CscParams
|
struct CscParams
|
||||||
{
|
{
|
||||||
float3 matrix[3];
|
half3x3 matrix;
|
||||||
float3 offsets;
|
half3 offsets;
|
||||||
float2 chromaOffset;
|
half2 chromaOffset;
|
||||||
float bitnessScaleFactor;
|
half bitnessScaleFactor;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
|
constexpr sampler s(coord::normalized, address::clamp_to_edge, filter::linear);
|
||||||
|
|
@ -21,48 +22,40 @@ vertex Vertex vs_draw(constant Vertex *vertices [[ buffer(0) ]], uint id [[ vert
|
||||||
return vertices[id];
|
return vertices[id];
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment float4 ps_draw_biplanar(Vertex v [[ stage_in ]],
|
fragment half4 ps_draw_biplanar(Vertex v [[ stage_in ]],
|
||||||
|
constant CscParams &cscParams [[ buffer(0) ]],
|
||||||
|
texture2d<half> luminancePlane [[ texture(0) ]],
|
||||||
|
texture2d<half> chrominancePlane [[ texture(1) ]])
|
||||||
|
{
|
||||||
|
float2 chromaOffset = float2(cscParams.chromaOffset) / float2(luminancePlane.get_width(),
|
||||||
|
luminancePlane.get_height());
|
||||||
|
half3 yuv = half3(luminancePlane.sample(s, v.texCoords).r,
|
||||||
|
chrominancePlane.sample(s, v.texCoords + chromaOffset).rg);
|
||||||
|
yuv *= cscParams.bitnessScaleFactor;
|
||||||
|
yuv -= cscParams.offsets;
|
||||||
|
|
||||||
|
return half4(yuv * cscParams.matrix, 1.0h);
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment half4 ps_draw_triplanar(Vertex v [[ stage_in ]],
|
||||||
constant CscParams &cscParams [[ buffer(0) ]],
|
constant CscParams &cscParams [[ buffer(0) ]],
|
||||||
texture2d<float> luminancePlane [[ texture(0) ]],
|
texture2d<half> luminancePlane [[ texture(0) ]],
|
||||||
texture2d<float> chrominancePlane [[ texture(1) ]])
|
texture2d<half> chrominancePlaneU [[ texture(1) ]],
|
||||||
|
texture2d<half> chrominancePlaneV [[ texture(2) ]])
|
||||||
{
|
{
|
||||||
float2 chromaOffset = float2(cscParams.chromaOffset.x / luminancePlane.get_width(),
|
float2 chromaOffset = float2(cscParams.chromaOffset) / float2(luminancePlane.get_width(),
|
||||||
cscParams.chromaOffset.y / luminancePlane.get_height());
|
luminancePlane.get_height());
|
||||||
float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r,
|
half3 yuv = half3(luminancePlane.sample(s, v.texCoords).r,
|
||||||
chrominancePlane.sample(s, v.texCoords + chromaOffset).rg);
|
chrominancePlaneU.sample(s, v.texCoords + chromaOffset).r,
|
||||||
|
chrominancePlaneV.sample(s, v.texCoords + chromaOffset).r);
|
||||||
yuv *= cscParams.bitnessScaleFactor;
|
yuv *= cscParams.bitnessScaleFactor;
|
||||||
yuv -= cscParams.offsets;
|
yuv -= cscParams.offsets;
|
||||||
|
|
||||||
float3 rgb;
|
return half4(yuv * cscParams.matrix, 1.0h);
|
||||||
rgb.r = dot(yuv, cscParams.matrix[0]);
|
|
||||||
rgb.g = dot(yuv, cscParams.matrix[1]);
|
|
||||||
rgb.b = dot(yuv, cscParams.matrix[2]);
|
|
||||||
return float4(rgb, 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment float4 ps_draw_triplanar(Vertex v [[ stage_in ]],
|
fragment half4 ps_draw_rgb(Vertex v [[ stage_in ]],
|
||||||
constant CscParams &cscParams [[ buffer(0) ]],
|
texture2d<half> rgbTexture [[ texture(0) ]])
|
||||||
texture2d<float> luminancePlane [[ texture(0) ]],
|
|
||||||
texture2d<float> chrominancePlaneU [[ texture(1) ]],
|
|
||||||
texture2d<float> chrominancePlaneV [[ texture(2) ]])
|
|
||||||
{
|
|
||||||
float2 chromaOffset = float2(cscParams.chromaOffset.x / luminancePlane.get_width(),
|
|
||||||
cscParams.chromaOffset.y / luminancePlane.get_height());
|
|
||||||
float3 yuv = float3(luminancePlane.sample(s, v.texCoords).r,
|
|
||||||
chrominancePlaneU.sample(s, v.texCoords + chromaOffset).r,
|
|
||||||
chrominancePlaneV.sample(s, v.texCoords + chromaOffset).r);
|
|
||||||
yuv *= cscParams.bitnessScaleFactor;
|
|
||||||
yuv -= cscParams.offsets;
|
|
||||||
|
|
||||||
float3 rgb;
|
|
||||||
rgb.r = dot(yuv, cscParams.matrix[0]);
|
|
||||||
rgb.g = dot(yuv, cscParams.matrix[1]);
|
|
||||||
rgb.b = dot(yuv, cscParams.matrix[2]);
|
|
||||||
return float4(rgb, 1.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment float4 ps_draw_rgb(Vertex v [[ stage_in ]],
|
|
||||||
texture2d<float> rgbTexture [[ texture(0) ]])
|
|
||||||
{
|
{
|
||||||
return rgbTexture.sample(s, v.texCoords);
|
return rgbTexture.sample(s, v.texCoords);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,15 +24,15 @@ extern "C" {
|
||||||
|
|
||||||
struct CscParams
|
struct CscParams
|
||||||
{
|
{
|
||||||
simd_float3 matrix[3];
|
simd_half3x3 matrix;
|
||||||
simd_float3 offsets;
|
simd_half3 offsets;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ParamBuffer
|
struct ParamBuffer
|
||||||
{
|
{
|
||||||
CscParams cscParams;
|
CscParams cscParams;
|
||||||
simd_float2 chromaOffset;
|
simd_half2 chromaOffset;
|
||||||
float bitnessScaleFactor;
|
simd_half1 bitnessScaleFactor;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Vertex
|
struct Vertex
|
||||||
|
|
@ -262,18 +262,14 @@ public:
|
||||||
getFramePremultipliedCscConstants(frame, cscMatrix, yuvOffsets);
|
getFramePremultipliedCscConstants(frame, cscMatrix, yuvOffsets);
|
||||||
getFrameChromaCositingOffsets(frame, chromaOffset);
|
getFrameChromaCositingOffsets(frame, chromaOffset);
|
||||||
|
|
||||||
// Copy the row-major CSC matrix into column-major for Metal
|
paramBuffer.cscParams.matrix = simd_matrix(simd_make_half3(cscMatrix[0], cscMatrix[3], cscMatrix[6]),
|
||||||
for (int i = 0; i < 3; i++) {
|
simd_make_half3(cscMatrix[1], cscMatrix[4], cscMatrix[7]),
|
||||||
paramBuffer.cscParams.matrix[i] = simd_make_float3(cscMatrix[0 + i],
|
simd_make_half3(cscMatrix[2], cscMatrix[5], cscMatrix[8]));
|
||||||
cscMatrix[3 + i],
|
paramBuffer.cscParams.offsets = simd_make_half3(yuvOffsets[0],
|
||||||
cscMatrix[6 + i]);
|
yuvOffsets[1],
|
||||||
}
|
yuvOffsets[2]);
|
||||||
|
paramBuffer.chromaOffset = simd_make_half2(chromaOffset[0],
|
||||||
paramBuffer.cscParams.offsets = simd_make_float3(yuvOffsets[0],
|
chromaOffset[1]);
|
||||||
yuvOffsets[1],
|
|
||||||
yuvOffsets[2]);
|
|
||||||
paramBuffer.chromaOffset = simd_make_float2(chromaOffset[0],
|
|
||||||
chromaOffset[1]);
|
|
||||||
|
|
||||||
// Set the EDR metadata for HDR10 to enable OS tonemapping
|
// Set the EDR metadata for HDR10 to enable OS tonemapping
|
||||||
if (frame->color_trc == AVCOL_TRC_SMPTE2084 && m_MasteringDisplayColorVolume != nullptr) {
|
if (frame->color_trc == AVCOL_TRC_SMPTE2084 && m_MasteringDisplayColorVolume != nullptr) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue