From c21038af88e086f947cfbdd14fbc162fed8b83c3 Mon Sep 17 00:00:00 2001 From: loki Date: Wed, 8 Apr 2020 02:15:08 +0300 Subject: [PATCH] Encode video with nvenc --- sunshine/platform/common.h | 4 ++ sunshine/platform/windows_dxgi.cpp | 76 +++++++++++++++++++++--------- sunshine/video.cpp | 64 ++++++++++++++----------- 3 files changed, 93 insertions(+), 51 deletions(-) diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 2028a554..60d791b0 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -6,6 +6,7 @@ #define SUNSHINE_COMMON_H #include +#include #include "sunshine/utility.h" struct sockaddr; @@ -66,6 +67,9 @@ public: struct hwdevice_ctx_t { void *hwdevice {}; + // Could be nullptr, depends on the encoder + std::shared_ptr lock; + virtual const platf::img_t*const convert(platf::img_t &img) { return nullptr; } diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp index 978a2a7b..d461fa16 100644 --- a/sunshine/platform/windows_dxgi.cpp +++ b/sunshine/platform/windows_dxgi.cpp @@ -308,6 +308,7 @@ public: auto &processor_in = it->second; D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr }; + std::lock_guard lg { *lock }; auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream); if(FAILED(status)) { BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']'; @@ -317,9 +318,12 @@ public: return &this->img; } - int init(std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) { + int init(std::shared_ptr &lock, std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) { HRESULT status; + this->lock = lock; + std::lock_guard lg { *lock }; + video::device_t::pointer vdevice_p; status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p); if(FAILED(status)) { @@ -531,7 +535,6 @@ public: return -1; } - DXGI_ADAPTER_DESC adapter_desc; adapter->GetDesc(&adapter_desc); @@ -780,6 +783,7 @@ public: }; class display_gpu_t : public display_base_t, public std::enable_shared_from_this { +public: capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override { auto img = (img_d3d_t*)img_base; @@ -787,6 +791,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this DXGI_OUTDUPL_FRAME_INFO frame_info; + std::lock_guard lg { *lock }; resource_t::pointer res_p {}; auto capture_status = dup.next_frame(frame_info, &res_p); resource_t res{res_p}; @@ -800,7 +805,7 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this return capture_e::timeout; } - texture2d_t::pointer src_p{}; + texture2d_t::pointer src_p {}; status = res->QueryInterface(IID_ID3D11Texture2D, (void **)&src_p); if (FAILED(status)) { @@ -808,11 +813,9 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this return capture_e::error; } - img->row_pitch = width * 4; - img->width = width; - img->height = height; - img->data = (std::uint8_t*)src_p; - img->texture.reset(src_p); + texture2d_t src { src_p }; + + device_ctx->CopyResource(img->texture.get(), src.get()); return capture_e::ok; } @@ -820,20 +823,6 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this std::shared_ptr alloc_img() override { auto img = std::make_shared(); - img->data = nullptr; - img->row_pitch = 0; - img->pixel_pitch = 4; - img->width = 0; - img->height = 0; - img->display = shared_from_this(); - - return img; - } - - int dummy_img(platf::img_t *img_base, int &dummy_data_p) override { - auto img = (img_d3d_t*)img_base; - - img->row_pitch = width * 4; D3D11_TEXTURE2D_DESC t {}; t.Width = width; t.Height = height; @@ -843,6 +832,28 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this t.Usage = D3D11_USAGE_DEFAULT; t.Format = format; + dxgi::texture2d_t::pointer tex_p {}; + auto status = device->CreateTexture2D(&t, nullptr, &tex_p); + if(FAILED(status)) { + BOOST_LOG(error) << "Failed to create img buf texture [0x"sv << util::hex(status).to_string_view() << ']'; + return nullptr; + } + + img->data = (std::uint8_t*)tex_p; + img->row_pitch = 0; + img->pixel_pitch = 4; + img->width = 0; + img->height = 0; + img->texture.reset(tex_p); + img->display = shared_from_this(); + + return img; + } + + int dummy_img(platf::img_t *img_base, int &dummy_data_p) override { + auto img = (img_d3d_t*)img_base; + + img->row_pitch = width * 4; auto dummy_data = std::make_unique(width * height); D3D11_SUBRESOURCE_DATA data { dummy_data.get(), @@ -850,14 +861,24 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this 0 }; + D3D11_TEXTURE2D_DESC t {}; + t.Width = width; + t.Height = height; + t.MipLevels = 1; + t.ArraySize = 1; + t.SampleDesc.Count = 1; + t.Usage = D3D11_USAGE_DEFAULT; + t.Format = format; + dxgi::texture2d_t::pointer tex_p {}; auto status = device->CreateTexture2D(&t, &data, &tex_p); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create dummy texture [0x"sv << util::hex(status).to_string_view() << ']'; return -1; } - img->texture.reset(tex_p); + img->data = (std::uint8_t*)tex_p; + img->texture.reset(tex_p); img->height = height; img->width = width; img->data = (std::uint8_t*)tex_p; @@ -866,10 +887,17 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this return 0; } + int init() { + lock = std::make_shared(); + std::lock_guard lg { *lock }; + return display_base_t::init(); + } + std::shared_ptr make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override { auto hwdevice = std::make_shared(); auto ret = hwdevice->init( + lock, shared_from_this(), device.get(), device_ctx.get(), @@ -882,6 +910,8 @@ class display_gpu_t : public display_base_t, public std::enable_shared_from_this return hwdevice; } + + std::shared_ptr lock; }; const char *format_str[] = { diff --git a/sunshine/video.cpp b/sunshine/video.cpp index d2a1fa45..1d5ae97d 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -38,7 +38,6 @@ void free_packet(AVPacket *packet) { } using ctx_t = util::safe_ptr; -using codec_t = util::safe_ptr_v2; using frame_t = util::safe_ptr; using buffer_t = util::safe_ptr; using sws_t = util::safe_ptr; @@ -98,7 +97,6 @@ struct session_t { buffer_t hwdevice; ctx_t ctx; - codec_t codec_handle; frame_t frame; @@ -112,10 +110,10 @@ static encoder_t nvenc { AV_PIX_FMT_D3D11, AV_PIX_FMT_NV12, AV_PIX_FMT_NV12, { - { {"force-idr"s, 1} }, "hevc_nvenc"s + { {"forced-idr"s, 1} }, "hevc_nvenc"s }, { - { {"force-idr"s, 1} }, "h264_nvenc"s + { {"forced-idr"s, 1}, { "preset"s , 9} }, "h264_nvenc"s }, false, @@ -169,12 +167,6 @@ struct capture_thread_ctx_t { util::sync_t> display_wp; }; -[[nodiscard]] codec_t open_codec(ctx_t &ctx, AVCodec *codec, AVDictionary **options) { - avcodec_open2(ctx.get(), codec, options); - - return codec_t { ctx.get() }; -} - void reset_display(std::shared_ptr &disp, AVHWDeviceType type) { // We try this twice, in case we still get an error on reinitialization for(int x = 0; x < 2; ++x) { @@ -251,11 +243,8 @@ void captureThread( auto &img = *round_robin++; while(img.use_count() > 1) {} - platf::capture_e status; - { - auto lg = display_wp.lock(); - status = disp->snapshot(img.get(), display_cursor); - } + + auto status = disp->snapshot(img.get(), display_cursor); switch (status) { case platf::capture_e::reinit: { reinit_event.raise(true); @@ -552,7 +541,7 @@ std::optional make_session(const encoder_t &encoder, const config_t av_dict_set_int(&options, "qp", config::video.qp, 0); } - auto codec_handle = open_codec(ctx, codec, &options); + avcodec_open2(ctx.get(), codec, &options); frame_t frame {av_frame_alloc() }; frame->format = ctx->pix_fmt; @@ -570,7 +559,6 @@ std::optional make_session(const encoder_t &encoder, const config_t return std::make_optional(session_t { std::move(hwdevice), std::move(ctx), - std::move(codec_handle), std::move(frame), sw_fmt, sws_color_space @@ -648,7 +636,7 @@ void encode_run( 0, 1 << 16, 1 << 16); } - img_p = img; + img_p = img.get(); } else { img_p = hwdevice_ctx->convert(*img); @@ -667,7 +655,16 @@ void encode_run( } } - if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) { + int err; + if(hwdevice_ctx && hwdevice_ctx->lock) { + std::lock_guard lg { *hwdevice_ctx->lock }; + err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data); + } + else { + err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data); + } + + if(err) { BOOST_LOG(fatal) << "Could not encode video packet"sv; log_flush(); std::abort(); @@ -710,20 +707,20 @@ void capture( int key_frame_nr = 1; while(!shutdown_event->peek() && images->running()) { // Wait for the display to be ready - std::shared_ptr hwdevice_ctx; + std::shared_ptr display; { auto lg = ref->display_wp.lock(); if(ref->display_wp->expired()) { continue; } - auto display = ref->display_wp->lock(); + display = ref->display_wp->lock(); + } - auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; - hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt); - if(!hwdevice_ctx) { - return; - } + auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; + auto hwdevice_ctx = display->make_hwdevice_ctx(config.width, config.height, pix_fmt); + if(!hwdevice_ctx) { + return; } encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data); @@ -886,7 +883,7 @@ void sw_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { // Need to have something refcounted if(!frame->buf[0]) { - frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor*)); + frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor)); } auto desc = (AVD3D11FrameDescriptor*)frame->buf[0]->data; @@ -902,13 +899,24 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { frame->width = img.width; } +void nvenc_lock(void *lock_p) { + ((std::recursive_mutex*)lock_p)->lock(); +} +void nvenc_unlock(void *lock_p) { + ((std::recursive_mutex*)lock_p)->unlock(); +} + util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) { buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; auto ctx = (AVD3D11VADeviceContext*)((AVHWDeviceContext*)ctx_buf->data)->hwctx; - + std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0); std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice); + ctx->lock_ctx = hwdevice_ctx->lock.get(); + ctx->lock = nvenc_lock; + ctx->unlock = nvenc_unlock; + auto err = av_hwdevice_ctx_init(ctx_buf.get()); if(err) { char err_str[AV_ERROR_MAX_STRING_SIZE] {0};