Optimize EGLRenderer with overlay VAOs and reduced GL_BLEND usage

This commit is contained in:
Cameron Gutman 2025-12-28 13:14:16 -06:00
commit c5b7a9c679
2 changed files with 123 additions and 98 deletions

View file

@ -18,11 +18,11 @@
#define GL_UNPACK_ROW_LENGTH_EXT 0x0CF2 #define GL_UNPACK_ROW_LENGTH_EXT 0x0CF2
#endif #endif
typedef struct _OVERLAY_VERTEX typedef struct _VERTEX
{ {
float x, y; float x, y;
float u, v; float u, v;
} OVERLAY_VERTEX, *POVERLAY_VERTEX; } VERTEX, *PVERTEX;
/* TODO: /* TODO:
* - handle more pixel formats * - handle more pixel formats
@ -54,14 +54,15 @@ EGLRenderer::EGLRenderer(IFFmpegRenderer *backendRenderer)
m_EGLDisplay(EGL_NO_DISPLAY), m_EGLDisplay(EGL_NO_DISPLAY),
m_Textures{0}, m_Textures{0},
m_OverlayTextures{0}, m_OverlayTextures{0},
m_OverlayVbos{0}, m_OverlayVBOs{0},
m_OverlayVAOs{0},
m_OverlayHasValidData{}, m_OverlayHasValidData{},
m_ShaderProgram(0), m_ShaderProgram(0),
m_OverlayShaderProgram(0), m_OverlayShaderProgram(0),
m_Context(0), m_Context(0),
m_Window(nullptr), m_Window(nullptr),
m_Backend(backendRenderer), m_Backend(backendRenderer),
m_VAO(0), m_VideoVAO(0),
m_BlockingSwapBuffers(false), m_BlockingSwapBuffers(false),
m_LastRenderSync(EGL_NO_SYNC), m_LastRenderSync(EGL_NO_SYNC),
m_LastFrame(av_frame_alloc()), m_LastFrame(av_frame_alloc()),
@ -97,23 +98,18 @@ EGLRenderer::~EGLRenderer()
if (m_OverlayShaderProgram) { if (m_OverlayShaderProgram) {
glDeleteProgram(m_OverlayShaderProgram); glDeleteProgram(m_OverlayShaderProgram);
} }
if (m_VAO) { if (m_VideoVAO) {
SDL_assert(m_glDeleteVertexArraysOES != nullptr); SDL_assert(m_glDeleteVertexArraysOES != nullptr);
m_glDeleteVertexArraysOES(1, &m_VAO); m_glDeleteVertexArraysOES(1, &m_VideoVAO);
} }
for (int i = 0; i < EGL_MAX_PLANES; i++) { glDeleteTextures(EGL_MAX_PLANES, m_Textures);
if (m_Textures[i] != 0) {
glDeleteTextures(1, &m_Textures[i]); glDeleteTextures(Overlay::OverlayMax, m_OverlayTextures);
} glDeleteBuffers(Overlay::OverlayMax, m_OverlayVBOs);
} if (m_glDeleteVertexArraysOES) {
for (int i = 0; i < Overlay::OverlayMax; i++) { m_glDeleteVertexArraysOES(Overlay::OverlayMax, m_OverlayVAOs);
if (m_OverlayTextures[i] != 0) {
glDeleteTextures(1, &m_OverlayTextures[i]);
}
if (m_OverlayVbos[i] != 0) {
glDeleteBuffers(1, &m_OverlayVbos[i]);
}
} }
SDL_GL_DeleteContext(m_Context); SDL_GL_DeleteContext(m_Context);
} }
@ -179,24 +175,27 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]); glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]);
// If the pixel data isn't tightly packed, it requires special handling
void* packedPixelData = nullptr; void* packedPixelData = nullptr;
if (m_GlesMajorVersion >= 3 || m_HasExtUnpackSubimage) { if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
// If we are GLES 3.0+ or have GL_EXT_unpack_subimage, GL can handle any pitch if (m_GlesMajorVersion >= 3 || m_HasExtUnpackSubimage) {
SDL_assert(newSurface->pitch % newSurface->format->BytesPerPixel == 0); // If we are GLES 3.0+ or have GL_EXT_unpack_subimage, GL can handle any pitch
glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, newSurface->pitch / newSurface->format->BytesPerPixel); SDL_assert(newSurface->pitch % newSurface->format->BytesPerPixel == 0);
} glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, newSurface->pitch / newSurface->format->BytesPerPixel);
else if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
// If we can't use GL_UNPACK_ROW_LENGTH and the surface isn't tightly packed,
// we must allocate a tightly packed buffer and copy our pixels there.
packedPixelData = malloc(newSurface->w * newSurface->h * newSurface->format->BytesPerPixel);
if (!packedPixelData) {
SDL_FreeSurface(newSurface);
return;
} }
else {
// If we can't use GL_UNPACK_ROW_LENGTH, we must allocate a tightly packed buffer
// and copy our pixels there.
packedPixelData = malloc(newSurface->w * newSurface->h * newSurface->format->BytesPerPixel);
if (!packedPixelData) {
SDL_FreeSurface(newSurface);
return;
}
SDL_ConvertPixels(newSurface->w, newSurface->h, SDL_ConvertPixels(newSurface->w, newSurface->h,
newSurface->format->format, newSurface->pixels, newSurface->pitch, newSurface->format->format, newSurface->pixels, newSurface->pitch,
newSurface->format->format, packedPixelData, newSurface->w * newSurface->format->BytesPerPixel); newSurface->format->format, packedPixelData, newSurface->w * newSurface->format->BytesPerPixel);
}
} }
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, newSurface->w, newSurface->h, 0, GL_RGBA, GL_UNSIGNED_BYTE, glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, newSurface->w, newSurface->h, 0, GL_RGBA, GL_UNSIGNED_BYTE,
@ -205,6 +204,9 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
if (packedPixelData) { if (packedPixelData) {
free(packedPixelData); free(packedPixelData);
} }
else if (newSurface->pitch != newSurface->w * newSurface->format->BytesPerPixel) {
glPixelStorei(GL_UNPACK_ROW_LENGTH_EXT, 0);
}
SDL_FRect overlayRect; SDL_FRect overlayRect;
@ -231,7 +233,7 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
// Convert screen space to normalized device coordinates // Convert screen space to normalized device coordinates
StreamUtils::screenSpaceToNormalizedDeviceCoords(&overlayRect, viewportWidth, viewportHeight); StreamUtils::screenSpaceToNormalizedDeviceCoords(&overlayRect, viewportWidth, viewportHeight);
OVERLAY_VERTEX verts[] = VERTEX verts[] =
{ {
{overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f}, {overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f},
{overlayRect.x, overlayRect.y + overlayRect.h, 0.0f, 0.0f}, {overlayRect.x, overlayRect.y + overlayRect.h, 0.0f, 0.0f},
@ -241,7 +243,8 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
{overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f} {overlayRect.x + overlayRect.w, overlayRect.y + overlayRect.h, 1.0f, 0.0f}
}; };
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVbos[type]); // Update the VBO for this overlay (already bound to a VAO)
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVBOs[type]);
glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW); glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW);
SDL_AtomicSet(&m_OverlayHasValidData[type], 1); SDL_AtomicSet(&m_OverlayHasValidData[type], 1);
@ -257,17 +260,18 @@ void EGLRenderer::renderOverlay(Overlay::OverlayType type, int viewportWidth, in
glUseProgram(m_OverlayShaderProgram); glUseProgram(m_OverlayShaderProgram);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVbos[type]);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(OVERLAY_VERTEX), (void*)offsetof(OVERLAY_VERTEX, x));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(OVERLAY_VERTEX), (void*)offsetof(OVERLAY_VERTEX, u));
glEnableVertexAttribArray(1);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]); glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[type]);
// Temporarily enable blending to draw the overlays with alpha
glEnable(GL_BLEND);
// Draw the overlay
m_glBindVertexArrayOES(m_OverlayVAOs[type]);
glDrawArrays(GL_TRIANGLES, 0, 6); glDrawArrays(GL_TRIANGLES, 0, 6);
m_glBindVertexArrayOES(0);
glDisable(GL_BLEND);
} }
int EGLRenderer::loadAndBuildShader(int shaderType, int EGLRenderer::loadAndBuildShader(int shaderType,
@ -611,28 +615,10 @@ bool EGLRenderer::initialize(PDECODER_PARAMETERS params)
SDL_GL_SetSwapInterval(0); SDL_GL_SetSwapInterval(0);
} }
glGenTextures(EGL_MAX_PLANES, m_Textures); if (!setupVideoRenderingState() || !setupOverlayRenderingState()) {
for (size_t i = 0; i < EGL_MAX_PLANES; ++i) { return false;
glBindTexture(GL_TEXTURE_EXTERNAL_OES, m_Textures[i]);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
} }
glGenBuffers(Overlay::OverlayMax, m_OverlayVbos);
glGenTextures(Overlay::OverlayMax, m_OverlayTextures);
for (size_t i = 0; i < Overlay::OverlayMax; ++i) {
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
GLenum err = glGetError(); GLenum err = glGetError();
if (err != GL_NO_ERROR) if (err != GL_NO_ERROR)
EGL_LOG(Error, "OpenGL error: %d", err); EGL_LOG(Error, "OpenGL error: %d", err);
@ -643,52 +629,87 @@ bool EGLRenderer::initialize(PDECODER_PARAMETERS params)
return err == GL_NO_ERROR; return err == GL_NO_ERROR;
} }
bool EGLRenderer::specialize() { bool EGLRenderer::setupVideoRenderingState() {
SDL_assert(!m_VAO); // Setup the video plane textures
glGenTextures(EGL_MAX_PLANES, m_Textures);
if (!compileShaders()) for (size_t i = 0; i < EGL_MAX_PLANES; ++i) {
return false; glBindTexture(GL_TEXTURE_EXTERNAL_OES, m_Textures[i]);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
// The viewport should have the aspect ratio of the video stream // The viewport should have the aspect ratio of the video stream
static const float vertices[] = { static const VERTEX vertices[] = {
// pos .... // tex coords // pos .... // tex coords
1.0f, 1.0f, 1.0f, 0.0f, { 1.0f, 1.0f, 1.0f, 0.0f },
1.0f, -1.0f, 1.0f, 1.0f, { -1.0f, 1.0f, 0.0f, 0.0f },
-1.0f, -1.0f, 0.0f, 1.0f, { -1.0f, -1.0f, 0.0f, 1.0f },
-1.0f, 1.0f, 0.0f, 0.0f, { -1.0f, -1.0f, 0.0f, 1.0f },
{ 1.0f, -1.0f, 1.0f, 1.0f },
}; { 1.0f, 1.0f, 1.0f, 0.0f },
static const unsigned int indices[] = {
0, 1, 3,
1, 2, 3,
}; };
glUseProgram(m_ShaderProgram); // Setup the VAO and VBO
unsigned int VBO;
unsigned int VBO, EBO; m_glGenVertexArraysOES(1, &m_VideoVAO);
m_glGenVertexArraysOES(1, &m_VAO);
glGenBuffers(1, &VBO); glGenBuffers(1, &VBO);
glGenBuffers(1, &EBO);
m_glBindVertexArrayOES(m_VAO); m_glBindVertexArrayOES(m_VideoVAO);
glBindBuffer(GL_ARRAY_BUFFER, VBO); glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof (vertices), vertices, GL_STATIC_DRAW); glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, EBO);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof (indices), indices, GL_STATIC_DRAW);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively // compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)0); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, x));
glEnableVertexAttribArray(0); glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof (float))); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, u));
glEnableVertexAttribArray(1); glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
m_glBindVertexArrayOES(0); m_glBindVertexArrayOES(0);
glDeleteBuffers(1, &VBO); glDeleteBuffers(1, &VBO);
glDeleteBuffers(1, &EBO);
GLenum err = glGetError();
if (err != GL_NO_ERROR) {
EGL_LOG(Error, "OpenGL error: %d", err);
}
return err == GL_NO_ERROR;
}
bool EGLRenderer::setupOverlayRenderingState() {
// Create overlay textures, VBOs, and VAOs
glGenBuffers(Overlay::OverlayMax, m_OverlayVBOs);
glGenTextures(Overlay::OverlayMax, m_OverlayTextures);
m_glGenVertexArraysOES(Overlay::OverlayMax, m_OverlayVAOs);
for (size_t i = 0; i < Overlay::OverlayMax; ++i) {
// Set up the overlay texture
glBindTexture(GL_TEXTURE_2D, m_OverlayTextures[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
// Create the VAO for the overlay
m_glBindVertexArrayOES(m_OverlayVAOs[i]);
glBindBuffer(GL_ARRAY_BUFFER, m_OverlayVBOs[i]);
// compileShader() ensures that aPosition and aTexCoord are indexes 0 and 1 respectively
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, x));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)offsetof(VERTEX, u));
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, 0);
m_glBindVertexArrayOES(0);
}
// Enable alpha blending
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
GLenum err = glGetError(); GLenum err = glGetError();
if (err != GL_NO_ERROR) { if (err != GL_NO_ERROR) {
@ -754,10 +775,11 @@ void EGLRenderer::renderFrame(AVFrame* frame)
SDL_assert(m_EGLImagePixelFormat != AV_PIX_FMT_NONE); SDL_assert(m_EGLImagePixelFormat != AV_PIX_FMT_NONE);
if (!specialize()) { // Now that we know the image format, we can compile the shaders
if (!compileShaders()) {
m_EGLImagePixelFormat = AV_PIX_FMT_NONE; m_EGLImagePixelFormat = AV_PIX_FMT_NONE;
// Failure to specialize is fatal. We must reset the renderer // Failure to compile shaders is fatal. We must reset the renderer
// to recover successfully. // to recover successfully.
// //
// Note: This seems to be easy to trigger when transitioning from // Note: This seems to be easy to trigger when transitioning from
@ -797,7 +819,6 @@ void EGLRenderer::renderFrame(AVFrame* frame)
glViewport(dst.x, dst.y, dst.w, dst.h); glViewport(dst.x, dst.y, dst.w, dst.h);
glUseProgram(m_ShaderProgram); glUseProgram(m_ShaderProgram);
m_glBindVertexArrayOES(m_VAO);
// If the frame format has changed, we'll need to recompute the constants // If the frame format has changed, we'll need to recompute the constants
if (hasFrameFormatChanged(frame) && (m_EGLImagePixelFormat == AV_PIX_FMT_NV12 || m_EGLImagePixelFormat == AV_PIX_FMT_P010)) { if (hasFrameFormatChanged(frame) && (m_EGLImagePixelFormat == AV_PIX_FMT_NV12 || m_EGLImagePixelFormat == AV_PIX_FMT_P010)) {
@ -815,10 +836,12 @@ void EGLRenderer::renderFrame(AVFrame* frame)
glUniform2fv(m_ShaderProgramParams[NV12_PARAM_CHROMA_OFFSET], 1, chromaOffset.data()); glUniform2fv(m_ShaderProgramParams[NV12_PARAM_CHROMA_OFFSET], 1, chromaOffset.data());
} }
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); // Draw the video
m_glBindVertexArrayOES(m_VideoVAO);
glDrawArrays(GL_TRIANGLES, 0, 6);
m_glBindVertexArrayOES(0); m_glBindVertexArrayOES(0);
// Draw overlays on top
for (int i = 0; i < Overlay::OverlayMax; i++) { for (int i = 0; i < Overlay::OverlayMax; i++) {
renderOverlay((Overlay::OverlayType)i, drawableWidth, drawableHeight); renderOverlay((Overlay::OverlayType)i, drawableWidth, drawableHeight);
} }

View file

@ -27,21 +27,23 @@ private:
void renderOverlay(Overlay::OverlayType type, int viewportWidth, int viewportHeight); void renderOverlay(Overlay::OverlayType type, int viewportWidth, int viewportHeight);
unsigned compileShader(const char* vertexShaderSrc, const char* fragmentShaderSrc); unsigned compileShader(const char* vertexShaderSrc, const char* fragmentShaderSrc);
bool compileShaders(); bool compileShaders();
bool specialize(); bool setupVideoRenderingState();
bool setupOverlayRenderingState();
static int loadAndBuildShader(int shaderType, const char *filename); static int loadAndBuildShader(int shaderType, const char *filename);
AVPixelFormat m_EGLImagePixelFormat; AVPixelFormat m_EGLImagePixelFormat;
void *m_EGLDisplay; void *m_EGLDisplay;
unsigned m_Textures[EGL_MAX_PLANES]; unsigned m_Textures[EGL_MAX_PLANES];
unsigned m_OverlayTextures[Overlay::OverlayMax]; unsigned m_OverlayTextures[Overlay::OverlayMax];
unsigned m_OverlayVbos[Overlay::OverlayMax]; unsigned m_OverlayVBOs[Overlay::OverlayMax];
unsigned m_OverlayVAOs[Overlay::OverlayMax];
SDL_atomic_t m_OverlayHasValidData[Overlay::OverlayMax]; SDL_atomic_t m_OverlayHasValidData[Overlay::OverlayMax];
unsigned m_ShaderProgram; unsigned m_ShaderProgram;
unsigned m_OverlayShaderProgram; unsigned m_OverlayShaderProgram;
SDL_GLContext m_Context; SDL_GLContext m_Context;
SDL_Window *m_Window; SDL_Window *m_Window;
IFFmpegRenderer *m_Backend; IFFmpegRenderer *m_Backend;
unsigned int m_VAO; unsigned int m_VideoVAO;
bool m_BlockingSwapBuffers; bool m_BlockingSwapBuffers;
EGLSync m_LastRenderSync; EGLSync m_LastRenderSync;
AVFrame* m_LastFrame; AVFrame* m_LastFrame;