From e36bde4acce48c21c79a57fb29727d96fdae6503 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Sun, 2 Jul 2023 22:16:20 -0500 Subject: [PATCH] Implement AV1 codec support This has a breaking change to StreamConfiguration that requires client updates. --- src/Connection.c | 2 +- src/Limelight.h | 59 ++++++++++++--------- src/Misc.c | 3 +- src/RtspConnection.c | 28 +++++++--- src/SdpGenerator.c | 15 +++++- src/VideoDepacketizer.c | 114 ++++++++++++++++++++++++---------------- 6 files changed, 143 insertions(+), 78 deletions(-) diff --git a/src/Connection.c b/src/Connection.c index c7b9385..40b00c7 100644 --- a/src/Connection.c +++ b/src/Connection.c @@ -288,7 +288,7 @@ int LiStartConnection(PSERVER_INFORMATION serverInfo, PSTREAM_CONFIGURATION stre } // Dimensions over 4096 are only supported with HEVC on NVENC - if (!StreamConfig.supportsHevc && + if (!(StreamConfig.supportedVideoFormats & ~VIDEO_FORMAT_MASK_H264) && (StreamConfig.width > 4096 || StreamConfig.height > 4096)) { Limelog("WARNING: Streaming at resolutions above 4K using H.264 will likely fail! Trying anyway!\n"); } diff --git a/src/Limelight.h b/src/Limelight.h index eae150c..8b890b1 100644 --- a/src/Limelight.h +++ b/src/Limelight.h @@ -61,14 +61,14 @@ typedef struct _STREAM_CONFIGURATION { // See AUDIO_CONFIGURATION constants and MAKE_AUDIO_CONFIGURATION() below. int audioConfiguration; - // Specifies that the client can accept an H.265 video stream - // if the server is able to provide one. - bool supportsHevc; + // Specifies the mask of supported video formats. + // See VIDEO_FORMAT constants below. + int supportedVideoFormats; - // Specifies that the client is requesting an HDR H.265 video stream. + // Specifies that the client is requesting an HDR video stream. // // This should only be set if: - // 1) The client decoder supports HEVC Main10 profile (supportsHevc must be set too) + // 1) The client decoder supports a 10-bit format (as set in supportedVideoFormats) // 2) The server has support for HDR as indicated by ServerCodecModeSupport in /serverinfo // // See ConnListenerSetHdrMode() for a callback to indicate when to set @@ -81,6 +81,12 @@ typedef struct _STREAM_CONFIGURATION { // (or in addition to) improving image quality. int hevcBitratePercentageMultiplier; + // Specifies the percentage that the specified bitrate will be adjusted + // when an AV1 stream will be delivered. This allows clients to opt to + // reduce bandwidth when AV1 is chosen as the video codec rather than + // (or in addition to) improving image quality. + int av1BitratePercentageMultiplier; + // If specified, the client's display refresh rate x 100. For example, // 59.94 Hz would be specified as 5994. This is used by recent versions // of GFE for enhanced frame pacing. @@ -113,7 +119,8 @@ typedef struct _STREAM_CONFIGURATION { void LiInitializeStreamConfiguration(PSTREAM_CONFIGURATION streamConfig); // These identify codec configuration data in the buffer lists -// of frames identified as IDR frames. +// of frames identified as IDR frames for H.264 and HEVC formats. +// For other codecs, all data is marked as BUFFER_TYPE_PICDATA. #define BUFFER_TYPE_PICDATA 0x00 #define BUFFER_TYPE_SPS 0x01 #define BUFFER_TYPE_PPS 0x02 @@ -129,7 +136,7 @@ typedef struct _LENTRY { // Size of data in bytes (never <= 0) int length; - // Buffer type (listed above) + // Buffer type (listed above, only set for H.264 and HEVC formats) int bufferType; } LENTRY, *PLENTRY; @@ -137,10 +144,13 @@ typedef struct _LENTRY { // previous P-frames. #define FRAME_TYPE_PFRAME 0x00 -// Indicates this frame contains SPS, PPS, and VPS (if applicable) -// as the first buffers in the list. Each NALU will appear as a separate -// buffer in the buffer list. The I-frame data follows immediately +// This is a key frame. +// +// For H.264 and HEVC, this means the frame contains SPS, PPS, and VPS (HEVC only) NALUs +// as the first buffers in the list. The I-frame data follows immediately // after the codec configuration NALUs. +// +// For other codecs, any configuration data is not split into separate buffers. #define FRAME_TYPE_IDR 0x01 // A decode unit describes a buffer chain of video data from multiple packets @@ -219,22 +229,19 @@ typedef struct _DECODE_UNIT { // The maximum number of channels supported #define AUDIO_CONFIGURATION_MAX_CHANNEL_COUNT 8 -// Passed to DecoderRendererSetup to indicate that the following video stream will be -// in H.264 High Profile. -#define VIDEO_FORMAT_H264 0x0001 - -// Passed to DecoderRendererSetup to indicate that the following video stream will be -// in H.265 Main profile. This will only be passed if supportsHevc is true. -#define VIDEO_FORMAT_H265 0x0100 - -// Passed to DecoderRendererSetup to indicate that the following video stream will be -// in H.265 Main10 (HDR10) profile. This will only be passed if enableHdr is true. -#define VIDEO_FORMAT_H265_MAIN10 0x0200 +// Passed in StreamConfiguration.supportedVideoFormats to specify supported codecs +// and to DecoderRendererSetup() to specify selected codec. +#define VIDEO_FORMAT_H264 0x0001 // H.264 High Profile +#define VIDEO_FORMAT_H265 0x0100 // HEVC Main Profile +#define VIDEO_FORMAT_H265_MAIN10 0x0200 // HEVC Main10 Profile (requires enableHdr) +#define VIDEO_FORMAT_AV1_MAIN8 0x1000 // AV1 Main 8-bit profile +#define VIDEO_FORMAT_AV1_MAIN10 0x2000 // AV1 Main 10-bit profile (requires enableHdr) // Masks for clients to use to match video codecs without profile-specific details. -#define VIDEO_FORMAT_MASK_H264 0x00FF -#define VIDEO_FORMAT_MASK_H265 0xFF00 -#define VIDEO_FORMAT_MASK_10BIT 0x0200 +#define VIDEO_FORMAT_MASK_H264 0x000F +#define VIDEO_FORMAT_MASK_H265 0x0F00 +#define VIDEO_FORMAT_MASK_AV1 0xF000 +#define VIDEO_FORMAT_MASK_10BIT 0x2200 // If set in the renderer capabilities field, this flag will cause audio/video data to // be submitted directly from the receive thread. This should only be specified if the @@ -268,6 +275,10 @@ typedef struct _DECODE_UNIT { // also providing a sample callback is not allowed. #define CAPABILITY_PULL_RENDERER 0x20 +// If set in the video renderer capabilities field, this flag specifies that the renderer +// supports reference frame invalidation for AV1 streams. This flag is only valid on video renderers. +#define CAPABILITY_REFERENCE_FRAME_INVALIDATION_AV1 0x40 + // If set in the video renderer capabilities field, this macro specifies that the renderer // supports slicing to increase decoding performance. The parameter specifies the desired // number of slices per frame. This capability is only valid on video renderers. diff --git a/src/Misc.c b/src/Misc.c index 43c0f55..8c3c553 100644 --- a/src/Misc.c +++ b/src/Misc.c @@ -123,7 +123,8 @@ bool isReferenceFrameInvalidationSupportedByDecoder(void) { LC_ASSERT(NegotiatedVideoFormat != 0); return ((NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H264) && (VideoCallbacks.capabilities & CAPABILITY_REFERENCE_FRAME_INVALIDATION_AVC)) || - ((NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) && (VideoCallbacks.capabilities & CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC)); + ((NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) && (VideoCallbacks.capabilities & CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC)) || + ((NegotiatedVideoFormat & VIDEO_FORMAT_MASK_AV1) && (VideoCallbacks.capabilities & CAPABILITY_REFERENCE_FRAME_INVALIDATION_AV1)); } bool isReferenceFrameInvalidationEnabled(void) { diff --git a/src/RtspConnection.c b/src/RtspConnection.c index 08e5483..b4f6baa 100644 --- a/src/RtspConnection.c +++ b/src/RtspConnection.c @@ -904,13 +904,27 @@ int performRtspHandshake(PSERVER_INFORMATION serverInfo) { goto Exit; } - // The RTSP DESCRIBE reply will contain a collection of SDP media attributes that - // describe the various supported video stream formats and include the SPS, PPS, - // and VPS (if applicable). We will use this information to determine whether the - // server can support HEVC. For some reason, they still set the MIME type of the HEVC - // format to H264, so we can't just look for the HEVC MIME type. What we'll do instead is - // look for the base 64 encoded VPS NALU prefix that is unique to the HEVC bitstream. - if (StreamConfig.supportsHevc && strstr(response.payload, "sprop-parameter-sets=AAAAAU")) { + if ((StreamConfig.supportedVideoFormats & VIDEO_FORMAT_MASK_AV1) && strstr(response.payload, "a=rtpmap:200 AV1/90000")) { + if (StreamConfig.enableHdr) { + NegotiatedVideoFormat = VIDEO_FORMAT_AV1_MAIN10; + } + else { + NegotiatedVideoFormat = VIDEO_FORMAT_AV1_MAIN8; + + // Apply bitrate adjustment for SDR AV1 if the client requested one + if (StreamConfig.av1BitratePercentageMultiplier != 0) { + StreamConfig.bitrate *= StreamConfig.av1BitratePercentageMultiplier; + StreamConfig.bitrate /= 100; + } + } + } + else if ((StreamConfig.supportedVideoFormats & VIDEO_FORMAT_MASK_H265) && strstr(response.payload, "sprop-parameter-sets=AAAAAU")) { + // The RTSP DESCRIBE reply will contain a collection of SDP media attributes that + // describe the various supported video stream formats and include the SPS, PPS, + // and VPS (if applicable). We will use this information to determine whether the + // server can support HEVC. For some reason, they still set the MIME type of the HEVC + // format to H264, so we can't just look for the HEVC MIME type. What we'll do instead is + // look for the base 64 encoded VPS NALU prefix that is unique to the HEVC bitstream. if (StreamConfig.enableHdr) { NegotiatedVideoFormat = VIDEO_FORMAT_H265_MAIN10; } diff --git a/src/SdpGenerator.c b/src/SdpGenerator.c index 5821581..ee32c75 100644 --- a/src/SdpGenerator.c +++ b/src/SdpGenerator.c @@ -347,7 +347,20 @@ static PSDP_OPTION getAttributesList(char*urlSafeAddr) { sprintf(payloadStr, "%d", slicesPerFrame); err |= addAttributeString(&optionHead, "x-nv-video[0].videoEncoderSlicesPerFrame", payloadStr); - if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { + if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_AV1) { + err |= addAttributeString(&optionHead, "x-nv-vqos[0].bitStreamFormat", "2"); + + if (AppVersionQuad[0] >= 7) { + // Enable HDR if requested + if (StreamConfig.enableHdr) { + err |= addAttributeString(&optionHead, "x-nv-video[0].dynamicRangeMode", "1"); + } + else { + err |= addAttributeString(&optionHead, "x-nv-video[0].dynamicRangeMode", "0"); + } + } + } + else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_H265) { err |= addAttributeString(&optionHead, "x-nv-clientSupportHevc", "1"); err |= addAttributeString(&optionHead, "x-nv-vqos[0].bitStreamFormat", "1"); diff --git a/src/VideoDepacketizer.c b/src/VideoDepacketizer.c index 738019e..0fd6c23 100644 --- a/src/VideoDepacketizer.c +++ b/src/VideoDepacketizer.c @@ -14,6 +14,7 @@ static bool waitingForIdrFrame; static bool waitingForRefInvalFrame; static unsigned int lastPacketInStream; static bool decodingFrame; +static int frameType; static bool strictIdrFrameWait; static uint64_t syntheticPtsBase; static uint16_t frameHostProcessingLatency; @@ -148,6 +149,9 @@ void destroyVideoDepacketizer(void) { } static bool getAnnexBStartSequence(PBUFFER_DESC current, PBUFFER_DESC startSeq) { + // We must not get called for other codecs + LC_ASSERT(NegotiatedVideoFormat & (VIDEO_FORMAT_MASK_H264 | VIDEO_FORMAT_MASK_H265)); + if (current->length < 3) { return false; } @@ -207,6 +211,10 @@ void validateDecodeUnitForPlayback(PDECODE_UNIT decodeUnit) { // We get 2 sets of VPS, SPS, and PPS NALUs in HDR mode. // FIXME: Should we normalize this or something for clients? } + else if (NegotiatedVideoFormat & VIDEO_FORMAT_MASK_AV1) { + // We don't parse the AV1 bitstream + LC_ASSERT(decodeUnit->bufferList->bufferType == BUFFER_TYPE_PICDATA); + } else { LC_ASSERT(false); } @@ -450,6 +458,7 @@ static void reassembleFrame(int frameNumber) { if (qdu != NULL) { qdu->decodeUnit.bufferList = nalChainHead; qdu->decodeUnit.fullLength = nalChainDataLength; + qdu->decodeUnit.frameType = frameType; qdu->decodeUnit.frameNumber = frameNumber; qdu->decodeUnit.frameHostProcessingLatency = frameHostProcessingLatency; qdu->decodeUnit.receiveTimeMs = firstPacketReceiveTime; @@ -463,14 +472,10 @@ static void reassembleFrame(int frameNumber) { qdu->decodeUnit.hdrActive = LiGetCurrentHostDisplayHdrMode(); qdu->decodeUnit.colorspace = (uint8_t)(qdu->decodeUnit.hdrActive ? COLORSPACE_REC_2020 : StreamConfig.colorSpace); - // IDR frames will have leading CSD buffers - if (nalChainHead->bufferType != BUFFER_TYPE_PICDATA) { - qdu->decodeUnit.frameType = FRAME_TYPE_IDR; + // Invoke the key frame callback if needed + if (qdu->decodeUnit.frameType == FRAME_TYPE_IDR) { notifyKeyFrameReceived(); } - else { - qdu->decodeUnit.frameType = FRAME_TYPE_PFRAME; - } nalChainHead = nalChainTail = NULL; nalChainDataLength = 0; @@ -520,6 +525,11 @@ static int getBufferFlags(char* data, int length) { BUFFER_DESC buffer; BUFFER_DESC candidate; + // We only parse H.264 and HEVC bitstreams + if (!(NegotiatedVideoFormat & (VIDEO_FORMAT_MASK_H264 | VIDEO_FORMAT_MASK_H265))) { + return BUFFER_TYPE_PICDATA; + } + buffer.data = data; buffer.length = (unsigned int)length; buffer.offset = 0; @@ -612,7 +622,7 @@ static void queueFragment(PLENTRY_INTERNAL* existingEntry, char* data, int offse } // Process an RTP Payload using the slow path that handles multiple NALUs per packet -static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* existingEntry) { +static void processAvcHevcRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* existingEntry) { // We should not have any NALUs when processing the first packet in an IDR frame LC_ASSERT(nalChainHead == NULL); LC_ASSERT(nalChainTail == NULL); @@ -637,9 +647,6 @@ static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* exi start++; #endif - // Now we're decoding a frame - decodingFrame = true; - if (isSeqReferenceFrameStart(currentPos)) { // No longer waiting for an IDR frame waitingForIdrFrame = false; @@ -651,6 +658,9 @@ static void processRtpPayloadSlow(PBUFFER_DESC currentPos, PLENTRY_INTERNAL* exi // Use the cached LENTRY for this NALU since it will be // the bulk of the data in this packet. containsPicData = true; + + // This is an IDR frame + frameType = FRAME_TYPE_IDR; } // Move to the next NALU @@ -784,6 +794,7 @@ static void processRtpPayload(PNV_VIDEO_PACKET videoPacket, int length, // We're now decoding a frame decodingFrame = true; + frameType = FRAME_TYPE_PFRAME; firstPacketReceiveTime = receiveTimeMs; // Some versions of Sunshine don't send a valid PTS, so we will @@ -810,6 +821,13 @@ static void processRtpPayload(PNV_VIDEO_PACKET videoPacket, int length, case 1: // Normal P-frame break; case 2: // IDR frame + // For other codecs, we trust the frame header rather than parsing the bitstream + // to determine if a given frame is an IDR frame. + if (!(NegotiatedVideoFormat & (VIDEO_FORMAT_MASK_H264 | VIDEO_FORMAT_MASK_H265))) { + waitingForIdrFrame = false; + frameType = FRAME_TYPE_IDR; + } + // Fall-through case 4: // Intra-refresh case 5: // P-frame with reference frames invalidated if (waitingForRefInvalFrame) { @@ -905,49 +923,57 @@ static void processRtpPayload(PNV_VIDEO_PACKET videoPacket, int length, // Other versions don't have a frame header at all } - // The Annex B NALU start prefix must be next - if (!getAnnexBStartSequence(¤tPos, NULL)) { - // If we aren't starting on a start prefix, something went wrong. - LC_ASSERT(false); + // We only parse H.264 and HEVC at the NALU level + if (NegotiatedVideoFormat & (VIDEO_FORMAT_MASK_H264 | VIDEO_FORMAT_MASK_H265)) { + // The Annex B NALU start prefix must be next + if (!getAnnexBStartSequence(¤tPos, NULL)) { + // If we aren't starting on a start prefix, something went wrong. + LC_ASSERT(false); - // For release builds, we will try to recover by searching for one. - // This mimics the way most decoders handle this situation. - skipToNextNal(¤tPos); - } + // For release builds, we will try to recover by searching for one. + // This mimics the way most decoders handle this situation. + skipToNextNal(¤tPos); + } - // If an AUD NAL is prepended to this frame data, remove it. - // Other parts of this code are not prepared to deal with a - // NAL of that type, so stripping it is the easiest option. - if (isAccessUnitDelimiter(¤tPos)) { - skipToNextNal(¤tPos); - } + // If an AUD NAL is prepended to this frame data, remove it. + // Other parts of this code are not prepared to deal with a + // NAL of that type, so stripping it is the easiest option. + if (isAccessUnitDelimiter(¤tPos)) { + skipToNextNal(¤tPos); + } - // There may be one or more SEI NAL units prepended to the - // frame data *after* the (optional) AUD. - while (isSeiNal(¤tPos)) { - skipToNextNal(¤tPos); + // There may be one or more SEI NAL units prepended to the + // frame data *after* the (optional) AUD. + while (isSeiNal(¤tPos)) { + skipToNextNal(¤tPos); + } } } - if (firstPacket && isIdrFrameStart(¤tPos)) - { - // SPS and PPS prefix is padded between NALs, so we must decode it with the slow path - processRtpPayloadSlow(¤tPos, existingEntry); - } - else - { - // Intel's H.264 Media Foundation encoder prepends a PPS to each P-frame. - // Skip it to avoid confusing clients. - if (firstPacket && isPictureParameterSetNal(¤tPos)) { - skipToNextNal(¤tPos); + if (NegotiatedVideoFormat & (VIDEO_FORMAT_MASK_H264 | VIDEO_FORMAT_MASK_H265)) { + if (firstPacket && isIdrFrameStart(¤tPos)) { + // SPS and PPS prefix is padded between NALs, so we must decode it with the slow path + processAvcHevcRtpPayloadSlow(¤tPos, existingEntry); } + else { + // Intel's H.264 Media Foundation encoder prepends a PPS to each P-frame. + // Skip it to avoid confusing clients. + if (firstPacket && isPictureParameterSetNal(¤tPos)) { + skipToNextNal(¤tPos); + } #ifdef FORCE_3_BYTE_START_SEQUENCES - if (firstPacket) { - currentPos.offset++; - currentPos.length--; - } + if (firstPacket) { + currentPos.offset++; + currentPos.length--; + } #endif + + queueFragment(existingEntry, currentPos.data, currentPos.offset, currentPos.length); + } + } + else { + // Other codecs are just passed through as is. queueFragment(existingEntry, currentPos.data, currentPos.offset, currentPos.length); } @@ -991,7 +1017,7 @@ static void processRtpPayload(PNV_VIDEO_PACKET videoPacket, int length, // depacketizer will next try to process a non-SOF packet, // and cause it to assert. if (dropStatePending) { - if (nalChainHead && nalChainHead->bufferType != BUFFER_TYPE_PICDATA) { + if (nalChainHead && frameType == FRAME_TYPE_IDR) { // Don't drop the frame state if this frame is an IDR frame itself, // otherwise we'll lose this IDR frame without another in flight // and have to wait until we hit our consecutive drop limit to