Add standalone NVENC encoder

2023-04-25 16:38:37 +03:00 · 2023-04-25 16:38:37 +03:00 · 68fa43a61c
commit 68fa43a61c
parent 7fe52bc5f8
34 changed files with 2124 additions and 642 deletions
--- a/src/platform/windows/display_vram.cpp
+++ b/src/platform/windows/display_vram.cpp
@ -16,7 +16,11 @@ extern "C" {

 #include "display.h"
 #include "misc.h"
+#include "src/config.h"
 #include "src/main.h"
+#include "src/nvenc/nvenc_config.h"
+#include "src/nvenc/nvenc_d3d11.h"
+#include "src/nvenc/nvenc_utils.h"
 #include "src/video.h"

 #define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
@ -361,10 +365,10 @@ namespace platf::dxgi {
    return compile_shader(file, "main_vs", "vs_5_0");
  }

-  class hwdevice_t: public platf::hwdevice_t {
+  class d3d_base_encode_device final {
  public:
    int
-    convert(platf::img_t &img_base) override {
+    convert(platf::img_t &img_base) {
      // Garbage collect mapped capture images whose weak references have expired
      for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) {
        if (it->second.img_weak.expired()) {
@ -413,28 +417,15 @@ namespace platf::dxgi {
    }

    void
-    set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override {
-      switch (colorspace) {
-        case 5:  // SWS_CS_SMPTE170M
-          color_p = &::video::colors[0];
-          break;
-        case 1:  // SWS_CS_ITU709
-          color_p = &::video::colors[2];
-          break;
-        case 9:  // SWS_CS_BT2020
-          color_p = &::video::colors[4];
-          break;
-        default:
-          BOOST_LOG(warning) << "Colorspace: ["sv << colorspace << "] not yet supported: switching to default"sv;
-          color_p = &::video::colors[0];
-      };
+    apply_colorspace(const ::video::sunshine_colorspace_t &colorspace) {
+      auto color_vectors = ::video::color_vectors_from_colorspace(colorspace);

-      if (color_range > 1) {
-        // Full range
-        ++color_p;
+      if (!color_vectors) {
+        BOOST_LOG(error) << "No vector data for colorspace"sv;
+        return;
      }

-      auto color_matrix = make_buffer((device_t::pointer) data, *color_p);
+      auto color_matrix = make_buffer(device.get(), *color_vectors);
      if (!color_matrix) {
        BOOST_LOG(warning) << "Failed to create color matrix"sv;
        return;
@ -445,78 +436,14 @@ namespace platf::dxgi {
      this->color_matrix = std::move(color_matrix);
    }

-    void
-    init_hwframes(AVHWFramesContext *frames) override {
-      // We may be called with a QSV or D3D11VA context
-      if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
-        auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
-
-        // The encoder requires textures with D3D11_BIND_RENDER_TARGET set
-        d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
-        d3d11_frames->MiscFlags = 0;
-      }
-
-      // We require a single texture
-      frames->initial_pool_size = 1;
-    }
-
    int
-    prepare_to_derive_context(int hw_device_type) override {
-      // QuickSync requires our device to be multithread-protected
-      if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
-        multithread_t mt;
+    init_output(ID3D11Texture2D *frame_texture, int width, int height) {
+      // The underlying frame pool owns the texture, so we must reference it for ourselves
+      frame_texture->AddRef();
+      output_texture.reset(frame_texture);

-        auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
-        if (FAILED(status)) {
-          BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
-          return -1;
-        }
-
-        mt->SetMultithreadProtected(TRUE);
-      }
-
-      return 0;
-    }
-
-    int
-    set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
-      this->hwframe.reset(frame);
-      this->frame = frame;
-
-      // Populate this frame with a hardware buffer if one isn't there already
-      if (!frame->buf[0]) {
-        auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
-        if (err) {
-          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
-          BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-          return -1;
-        }
-      }
-
-      // If this is a frame from a derived context, we'll need to map it to D3D11
-      ID3D11Texture2D *frame_texture;
-      if (frame->format != AV_PIX_FMT_D3D11) {
-        frame_t d3d11_frame { av_frame_alloc() };
-
-        d3d11_frame->format = AV_PIX_FMT_D3D11;
-
-        auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
-        if (err) {
-          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
-          BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
-          return -1;
-        }
-
-        // Get the texture from the mapped frame
-        frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
-      }
-      else {
-        // Otherwise, we can just use the texture inside the original frame
-        frame_texture = (ID3D11Texture2D *) frame->data[0];
-      }
-
-      auto out_width = frame->width;
-      auto out_height = frame->height;
+      auto out_width = width;
+      auto out_height = height;

      float in_width = display->width;
      float in_height = display->height;
@ -533,10 +460,6 @@ namespace platf::dxgi {
      outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f };
      outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f };

-      // The underlying frame pool owns the texture, so we must reference it for ourselves
-      frame_texture->AddRef();
-      hwframe_texture.reset(frame_texture);
-
      float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f };  // aligned to 16-byte
      info_scene = make_buffer(device.get(), info_in);

@ -550,7 +473,7 @@ namespace platf::dxgi {
        D3D11_RTV_DIMENSION_TEXTURE2D
      };

-      auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
+      auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
      if (FAILED(status)) {
        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
        return -1;
@ -558,7 +481,7 @@ namespace platf::dxgi {

      nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM;

-      status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
+      status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
      if (FAILED(status)) {
        BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
        return -1;
@ -574,9 +497,7 @@ namespace platf::dxgi {
    }

    int
-    init(
-      std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p,
-      pix_fmt_e pix_fmt) {
+    init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
      D3D_FEATURE_LEVEL featureLevels[] {
        D3D_FEATURE_LEVEL_11_1,
        D3D_FEATURE_LEVEL_11_0,
@ -615,8 +536,6 @@ namespace platf::dxgi {
        BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
      }

-      data = device.get();
-
      format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010);
      status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs);
      if (status) {
@ -673,7 +592,13 @@ namespace platf::dxgi {
        return -1;
      }

-      color_matrix = make_buffer(device.get(), ::video::colors[0]);
+      auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
+      if (!default_color_vectors) {
+        BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
+        return -1;
+      }
+
+      color_matrix = make_buffer(device.get(), *default_color_vectors);
      if (!color_matrix) {
        BOOST_LOG(error) << "Failed to create color matrix buffer"sv;
        return -1;
@ -721,7 +646,6 @@ namespace platf::dxgi {
      return 0;
    }

-  private:
    struct encoder_img_ctx_t {
      // Used to determine if the underlying texture changes.
      // Not safe for actual use by the encoder!
@ -789,9 +713,6 @@ namespace platf::dxgi {
      return 0;
    }

-  public:
-    frame_t hwframe;
-
    ::video::color_t *color_p;

    buf_t info_scene;
@ -805,9 +726,6 @@ namespace platf::dxgi {
    render_target_t nv12_Y_rt;
    render_target_t nv12_UV_rt;

-    // The image referenced by hwframe
-    texture2d_t hwframe_texture;
-
    // d3d_img_t::id -> encoder_img_ctx_t
    // These store the encoder textures for each img_t that passes through
    // convert(). We can't store them in the img_t itself because it is shared
@ -830,6 +748,149 @@ namespace platf::dxgi {

    device_t device;
    device_ctx_t device_ctx;
+
+    texture2d_t output_texture;
+  };
+
+  class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
+  public:
+    int
+    init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
+      int result = base.init(display, adapter_p, pix_fmt);
+      data = base.device.get();
+      return result;
+    }
+
+    int
+    convert(platf::img_t &img_base) override {
+      return base.convert(img_base);
+    }
+
+    void
+    apply_colorspace() override {
+      base.apply_colorspace(colorspace);
+    }
+
+    void
+    init_hwframes(AVHWFramesContext *frames) override {
+      // We may be called with a QSV or D3D11VA context
+      if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
+        auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx;
+
+        // The encoder requires textures with D3D11_BIND_RENDER_TARGET set
+        d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET;
+        d3d11_frames->MiscFlags = 0;
+      }
+
+      // We require a single texture
+      frames->initial_pool_size = 1;
+    }
+
+    int
+    prepare_to_derive_context(int hw_device_type) override {
+      // QuickSync requires our device to be multithread-protected
+      if (hw_device_type == AV_HWDEVICE_TYPE_QSV) {
+        multithread_t mt;
+
+        auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt);
+        if (FAILED(status)) {
+          BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']';
+          return -1;
+        }
+
+        mt->SetMultithreadProtected(TRUE);
+      }
+
+      return 0;
+    }
+
+    int
+    set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override {
+      this->hwframe.reset(frame);
+      this->frame = frame;
+
+      // Populate this frame with a hardware buffer if one isn't there already
+      if (!frame->buf[0]) {
+        auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0);
+        if (err) {
+          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
+          BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+          return -1;
+        }
+      }
+
+      // If this is a frame from a derived context, we'll need to map it to D3D11
+      ID3D11Texture2D *frame_texture;
+      if (frame->format != AV_PIX_FMT_D3D11) {
+        frame_t d3d11_frame { av_frame_alloc() };
+
+        d3d11_frame->format = AV_PIX_FMT_D3D11;
+
+        auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
+        if (err) {
+          char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };
+          BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err);
+          return -1;
+        }
+
+        // Get the texture from the mapped frame
+        frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0];
+      }
+      else {
+        // Otherwise, we can just use the texture inside the original frame
+        frame_texture = (ID3D11Texture2D *) frame->data[0];
+      }
+
+      return base.init_output(frame_texture, frame->width, frame->height);
+    }
+
+  private:
+    d3d_base_encode_device base;
+    frame_t hwframe;
+  };
+
+  class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
+  public:
+    bool
+    init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
+      buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
+      if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
+        BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
+        return false;
+      }
+
+      if (base.init(display, adapter_p, pix_fmt)) return false;
+
+      nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11>(base.device.get());
+      nvenc = nvenc_d3d.get();
+
+      return true;
+    }
+
+    bool
+    init_encoder(const ::video::config_t &client_config, const ::video::sunshine_colorspace_t &colorspace) override {
+      if (!nvenc_d3d) return false;
+
+      nvenc::nvenc_config nvenc_config;
+      nvenc_config.quality_preset = config::video.nv.nv_preset ? (*config::video.nv.nv_preset - 11) : 1;
+      nvenc_config.h264_cavlc = (config::video.nv.nv_coder == NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC);
+
+      auto nvenc_colorspace = nvenc::nvenc_colorspace_from_sunshine_colorspace(colorspace);
+      if (!nvenc_d3d->create_encoder(nvenc_config, client_config, nvenc_colorspace, buffer_format)) return false;
+
+      base.apply_colorspace(colorspace);
+      return base.init_output(nvenc_d3d->get_input_texture(), client_config.width, client_config.height) == 0;
+    }
+
+    int
+    convert(platf::img_t &img_base) override {
+      return base.convert(img_base);
+    }
+
+  private:
+    d3d_base_encode_device base;
+    std::unique_ptr<nvenc::nvenc_d3d11> nvenc_d3d;
+    NV_ENC_BUFFER_FORMAT buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
  };

  bool
@ -1464,26 +1525,32 @@ namespace platf::dxgi {
    };
  }

-  std::shared_ptr<platf::hwdevice_t>
-  display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) {
+  std::unique_ptr<avcodec_encode_device_t>
+  display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) {
    if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) {
      BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']';

      return nullptr;
    }

-    auto hwdevice = std::make_shared<hwdevice_t>();
+    auto device = std::make_unique<d3d_avcodec_encode_device_t>();

-    auto ret = hwdevice->init(
-      shared_from_this(),
-      adapter.get(),
-      pix_fmt);
+    auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);

    if (ret) {
      return nullptr;
    }

-    return hwdevice;
+    return device;
+  }
+
+  std::unique_ptr<nvenc_encode_device_t>
+  display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) {
+    auto device = std::make_unique<d3d_nvenc_encode_device_t>();
+    if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
+      return nullptr;
+    }
+    return device;
  }

  int