Add standalone NVENC encoder

This commit is contained in:
ns6089 2023-04-25 16:38:37 +03:00 committed by Cameron Gutman
commit 68fa43a61c
34 changed files with 2124 additions and 642 deletions

View file

@ -16,7 +16,11 @@ extern "C" {
#include "display.h"
#include "misc.h"
#include "src/config.h"
#include "src/main.h"
#include "src/nvenc/nvenc_config.h"
#include "src/nvenc/nvenc_d3d11.h"
#include "src/nvenc/nvenc_utils.h"
#include "src/video.h"
#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
@ -361,10 +365,10 @@ namespace platf::dxgi {
return compile_shader(file, "main_vs", "vs_5_0");
}
class hwdevice_t: public platf::hwdevice_t {
class d3d_base_encode_device final {
public:
int
convert(platf::img_t &img_base) override {
convert(platf::img_t &img_base) {
// Garbage collect mapped capture images whose weak references have expired
for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
if (it->second.img_weak.expired()) {
@ -413,28 +417,15 @@ namespace platf::dxgi {
}
void
set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
switch (colorspace) {
case 5: // SWS_CS_SMPTE170M
color_p = &::video::colors[0];
break;
case 1: // SWS_CS_ITU709
color_p = &::video::colors[2];
break;
case 9: // SWS_CS_BT2020
color_p = &::video::colors[4];
break;
default:
BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
color_p = &::video::colors[0];
};
apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);
if (color_range > 1) {
// Full range
++color_p;
if (!color_vectors) {
BOOST_LOG(error) << "No vector data for colorspace"sv;
return;
}
auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
auto color_matrix = make_buffer(device.get(), *color_vectors);
if (!color_matrix) {
BOOST_LOG(warning) << "Failed to create color matrix"sv;
return;
@ -445,78 +436,14 @@ namespace platf::dxgi {
this->color_matrix = std::move(color_matrix);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
init_output(ID3D11Texture2D *frame_texture, int width, int height) {
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
output_texture.reset(frame_texture);
auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
auto out_width = frame->width;
auto out_height = frame->height;
auto out_width = width;
auto out_height = height;
float in_width = display->width;
float in_height = display->height;
@ -533,10 +460,6 @@ namespace platf::dxgi {
outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };
// The underlying frame pool owns the texture, so we must reference it for ourselves
frame_texture->AddRef();
hwframe_texture.reset(frame_texture);
float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; // aligned to 16-byte
info_scene = make_buffer(device.get(), info_in);
@ -550,7 +473,7 @@ namespace platf::dxgi {
D3D11_RTV_DIMENSION_TEXTURE2D
};
auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -558,7 +481,7 @@ namespace platf::dxgi {
nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;
status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
@ -574,9 +497,7 @@ namespace platf::dxgi {
}
int
init(
std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
pix_fmt_e pix_fmt) {
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
@ -615,8 +536,6 @@ namespace platf::dxgi {
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
}
data = device.get();
format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
if (status) {
@ -673,7 +592,13 @@ namespace platf::dxgi {
return -1;
}
color_matrix = make_buffer(device.get(), ::video::colors[0]);
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
if (!default_color_vectors) {
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
return -1;
}
color_matrix = make_buffer(device.get(), *default_color_vectors);
if (!color_matrix) {
BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
return -1;
@ -721,7 +646,6 @@ namespace platf::dxgi {
return 0;
}
private:
struct encoder_img_ctx_t {
// Used to determine if the underlying texture changes.
// Not safe for actual use by the encoder!
@ -789,9 +713,6 @@ namespace platf::dxgi {
return 0;
}
public:
frame_t hwframe;
::video::color_t *color_p;
buf_t info_scene;
@ -805,9 +726,6 @@ namespace platf::dxgi {
render_target_t nv12_Y_rt;
render_target_t nv12_UV_rt;
// The image referenced by hwframe
texture2d_t hwframe_texture;
// d3d_img_t::id -> encoder_img_ctx_t
// These store the encoder textures for each img_t that passes through
// convert(). We can't store them in the img_t itself because it is shared
@ -830,6 +748,149 @@ namespace platf::dxgi {
device_t device;
device_ctx_t device_ctx;
texture2d_t output_texture;
};
class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
public:
int
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
int result = base.init(display, adapter_p, pix_fmt);
data = base.device.get();
return result;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
void
apply_colorspace() override {
base.apply_colorspace(colorspace);
}
void
init_hwframes(AVHWFramesContext *frames) override {
// We may be called with a QSV or D3D11VA context
if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
// The encoder requires textures with D3D11_BIND_RENDER_TARGET set
d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
d3d11_frames->MiscFlags = 0;
}
// We require a single texture
frames->initial_pool_size = 1;
}
int
prepare_to_derive_context(int hw_device_type) override {
// QuickSync requires our device to be multithread-protected
if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
multithread_t mt;
auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}
mt->SetMultithreadProtected(TRUE);
}
return 0;
}
int
set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
this->hwframe.reset(frame);
this->frame = frame;
// Populate this frame with a hardware buffer if one isn't there already
if (!frame->buf[0]) {
auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
}
// If this is a frame from a derived context, we'll need to map it to D3D11
ID3D11Texture2D *frame_texture;
if (frame->format != AV_PIX_FMT_D3D11) {
frame_t d3d11_frame { av_frame_alloc() };
d3d11_frame->format = AV_PIX_FMT_D3D11;
auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
if (err) {
char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
return -1;
}
// Get the texture from the mapped frame
frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
}
else {
// Otherwise, we can just use the texture inside the original frame
frame_texture = (ID3D11Texture2D *) frame->data[0];
}
return base.init_output(frame_texture, frame->width, frame->height);
}
private:
d3d_base_encode_device base;
frame_t hwframe;
};
class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
public:
bool
init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
return false;
}
if (base.init(display, adapter_p, pix_fmt)) return false;
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
nvenc = nvenc_d3d.get();
return true;
}
bool
init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override {
if (!nvenc_d3d) return false;
nvenc::nvenc_config nvenc_config;
nvenc_config.quality_preset = config::video.nv.nv_preset ? (*config::video.nv.nv_preset - 11) : 1;
nvenc_config.h264_cavlc = (config::video.nv.nv_coder == NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC);
auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace);
if (!nvenc_d3d->create_encoder(nvenc_config, client_config, nvenc_colorspace, buffer_format)) return false;
base.apply_colorspace(colorspace);
return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0;
}
int
convert(platf::img_t &img_base) override {
return base.convert(img_base);
}
private:
d3d_base_encode_device base;
std::unique_ptr<nvenc::nvenc_d3d11> nvenc_d3d;
NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
};
bool
@ -1464,26 +1525,32 @@ namespace platf::dxgi {
};
}
std::shared_ptr<platf::hwdevice_t>
display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
std::unique_ptr<avcodec_encode_device_t>
display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';
return nullptr;
}
auto hwdevice = std::make_shared<hwdevice_t>();
auto device = std::make_unique<d3d_avcodec_encode_device_t>();
auto ret = hwdevice->init(
shared_from_this(),
adapter.get(),
pix_fmt);
auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
if (ret) {
return nullptr;
}
return hwdevice;
return device;
}
std::unique_ptr<nvenc_encode_device_t>
display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) {
auto device = std::make_unique<d3d_nvenc_encode_device_t>();
if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
return nullptr;
}
return device;
}
int