diff --git a/README.md b/README.md index e9532d00..b3160832 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ Install the following: #### Common ``` -sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev +sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev libevdev-dev ``` #### X11 ``` -sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libevdev-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev +sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev ``` #### KMS diff --git a/gen-deb.in b/gen-deb.in index 70da08d2..3cb64ea0 100755 --- a/gen-deb.in +++ b/gen-deb.in @@ -38,7 +38,7 @@ Architecture: amd64 Maintainer: @loki Priority: optional Version: 0.11.0 -Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0, libboost-log1.67.0 | libboost-log1.71.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2 +Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0 | libboost-thread1.74.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0 | libboost-filesystem1.74.0, libboost-log1.67.0 | libboost-log1.71.0 | libboost-log1.74.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2 Description: Gamestream host for Moonlight EOF diff --git a/sunshine/config.cpp b/sunshine/config.cpp index 18083d9a..a35d5955 100644 --- a/sunshine/config.cpp +++ b/sunshine/config.cpp @@ -99,16 +99,22 @@ enum quality_e : int { _default = 0, speed, balanced, - //quality2, }; -enum rc_e : int { +enum class rc_hevc_e : int { constqp, /**< Constant QP mode */ vbr_latency, /**< Latency Constrained Variable Bitrate */ vbr_peak, /**< Peak Contrained Variable Bitrate */ cbr, /**< Constant bitrate mode */ }; +enum class rc_h264_e : int { + constqp, /**< Constant QP mode */ + cbr, /**< Constant bitrate mode */ + vbr_peak, /**< Peak Contrained Variable Bitrate */ + vbr_latency, /**< Latency Constrained Variable Bitrate */ +}; + enum coder_e : int { _auto = 0, cabac, @@ -120,15 +126,25 @@ std::optional quality_from_view(const std::string_view &quality) { if(quality == #x##sv) return x _CONVERT_(speed); _CONVERT_(balanced); - //_CONVERT_(quality2); if(quality == "default"sv) return _default; #undef _CONVERT_ return std::nullopt; } -std::optional rc_from_view(const std::string_view &rc) { +std::optional rc_h264_from_view(const std::string_view &rc) { #define _CONVERT_(x) \ - if(rc == #x##sv) return x + if(rc == #x##sv) return (int)rc_h264_e::x + _CONVERT_(constqp); + _CONVERT_(vbr_latency); + _CONVERT_(vbr_peak); + _CONVERT_(cbr); +#undef _CONVERT_ + return std::nullopt; +} + +std::optional rc_hevc_from_view(const std::string_view &rc) { +#define _CONVERT_(x) \ + if(rc == #x##sv) return (int)rc_hevc_e::x _CONVERT_(constqp); _CONVERT_(vbr_latency); _CONVERT_(vbr_peak); @@ -165,6 +181,7 @@ video_t video { { amd::balanced, std::nullopt, + std::nullopt, -1 }, // amd {}, // encoder @@ -659,8 +676,14 @@ void apply_config(std::unordered_map &&vars) { int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view); int_f(vars, "amd_quality", video.amd.quality, amd::quality_from_view); - int_f(vars, "amd_rc", video.amd.rc, amd::rc_from_view); + + std::string rc; + string_f(vars, "amd_rc", rc); int_f(vars, "amd_coder", video.amd.coder, amd::coder_from_view); + if(!rc.empty()) { + video.amd.rc_h264 = amd::rc_h264_from_view(rc); + video.amd.rc_hevc = amd::rc_hevc_from_view(rc); + } string_f(vars, "encoder", video.encoder); string_f(vars, "adapter_name", video.adapter_name); diff --git a/sunshine/config.h b/sunshine/config.h index 80d4052d..624e4225 100644 --- a/sunshine/config.h +++ b/sunshine/config.h @@ -29,7 +29,8 @@ struct video_t { struct { std::optional quality; - std::optional rc; + std::optional rc_h264; + std::optional rc_hevc; int coder; } amd; diff --git a/sunshine/platform/linux/cuda.cpp b/sunshine/platform/linux/cuda.cpp index b15104a5..c907ae6c 100644 --- a/sunshine/platform/linux/cuda.cpp +++ b/sunshine/platform/linux/cuda.cpp @@ -56,6 +56,10 @@ inline static int check(CUresult result, const std::string_view &sv) { return 0; } +void freeStream(CUstream stream) { + CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream"); +} + class img_t : public platf::img_t { public: tex_t tex; @@ -94,7 +98,8 @@ public: this->hwframe.reset(frame); this->frame = frame; - if(((AVHWFramesContext *)frame->hw_frames_ctx->data)->sw_format != AV_PIX_FMT_NV12) { + auto hwframe_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data; + if(hwframe_ctx->sw_format != AV_PIX_FMT_NV12) { BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv; return -1; } @@ -105,6 +110,15 @@ public: return -1; } + auto cuda_ctx = (AVCUDADeviceContext *)hwframe_ctx->device_ctx->hwctx; + + stream = make_stream(); + if(!stream) { + return -1; + } + + cuda_ctx->stream = stream.get(); + auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4); if(!sws_opt) { return -1; @@ -142,13 +156,14 @@ public: return; } - sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, { frame->width, frame->height, 0, 0 }); + sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), { frame->width, frame->height, 0, 0 }); } cudaTextureObject_t tex_obj(const tex_t &tex) const { return linear_interpolation ? tex.texture.linear : tex.texture.point; } + stream_t stream; frame_t hwframe; int width, height; @@ -162,7 +177,7 @@ public: class cuda_ram_t : public cuda_t { public: int convert(platf::img_t &img) override { - return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex)); + return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get()); } int set_frame(AVFrame *frame) { @@ -186,7 +201,7 @@ public: class cuda_vram_t : public cuda_t { public: int convert(platf::img_t &img) override { - return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex)); + return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex), stream.get()); } }; @@ -256,6 +271,28 @@ int init() { return 0; } +class ctx_t { +public: + ctx_t(NVFBC_SESSION_HANDLE handle) { + NVFBC_BIND_CONTEXT_PARAMS params { NVFBC_BIND_CONTEXT_PARAMS_VER }; + + if(func.nvFBCBindContext(handle, ¶ms)) { + BOOST_LOG(error) << "Couldn't bind NvFBC context to current thread: " << func.nvFBCGetLastErrorStr(handle); + } + + this->handle = handle; + } + + ~ctx_t() { + NVFBC_RELEASE_CONTEXT_PARAMS params { NVFBC_RELEASE_CONTEXT_PARAMS_VER }; + if(func.nvFBCReleaseContext(handle, ¶ms)) { + BOOST_LOG(error) << "Couldn't release NvFBC context from current thread: " << func.nvFBCGetLastErrorStr(handle); + } + } + + NVFBC_SESSION_HANDLE handle; +}; + class handle_t { enum flag_e { SESSION_HANDLE, @@ -347,24 +384,26 @@ public: return 0; } - ~handle_t() { + int reset() { if(!handle_flags[SESSION_HANDLE]) { - return; + return 0; } - if(handle_flags[SESSION_CAPTURE]) { - NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER }; - - if(func.nvFBCDestroyCaptureSession(handle, ¶ms)) { - BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle); - } - } + stop(); NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER }; if(func.nvFBCDestroyHandle(handle, ¶ms)) { BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle); } + + handle_flags[SESSION_HANDLE] = false; + + return 0; + } + + ~handle_t() { + reset(); } std::bitset handle_flags; @@ -380,6 +419,8 @@ public: return -1; } + ctx_t ctx { handle->handle }; + auto status_params = handle->status(); if(!status_params) { return -1; @@ -442,8 +483,9 @@ public: // Force display_t::capture to initialize handle_t::capture cursor_visible = !*cursor; + ctx_t ctx { handle.handle }; auto fg = util::fail_guard([&]() { - handle.stop(); + handle.reset(); }); while(img) { diff --git a/sunshine/platform/linux/cuda.cu b/sunshine/platform/linux/cuda.cu index 49f088f8..f69be730 100644 --- a/sunshine/platform/linux/cuda.cu +++ b/sunshine/platform/linux/cuda.cu @@ -110,6 +110,23 @@ void freeCudaPtr_t::operator()(void *ptr) { CU_CHECK_IGNORE(cudaFree(ptr), "Couldn't free cuda device pointer"); } +void freeCudaStream_t::operator()(cudaStream_t ptr) { + CU_CHECK_IGNORE(cudaStreamDestroy(ptr), "Couldn't free cuda stream"); +} + +stream_t make_stream(int flags) { + cudaStream_t stream; + + if(!flags) { + CU_CHECK_PTR(cudaStreamCreate(&stream), "Couldn't create cuda stream"); + } + else { + CU_CHECK_PTR(cudaStreamCreateWithFlags(&stream, flags), "Couldn't create cuda stream with flags"); + } + + return stream_t { stream }; +} + inline __device__ float3 bgra_to_rgb(uchar4 vec) { return make_float3((float)vec.z, (float)vec.y, (float)vec.x); } @@ -203,8 +220,8 @@ std::optional tex_t::make(int height, int pitch) { tex_t::tex_t() : array {}, texture { INVALID_TEXTURE } {} tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } { - other.array = 0; - other.texture.point = INVALID_TEXTURE; + other.array = 0; + other.texture.point = INVALID_TEXTURE; other.texture.linear = INVALID_TEXTURE; } @@ -269,18 +286,18 @@ std::optional sws_t::make(int in_width, int in_height, int out_width, int return std::make_optional(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor, std::move(ptr)); } -int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) { - return convert(Y, UV, pitchY, pitchUV, texture, viewport); +int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream) { + return convert(Y, UV, pitchY, pitchUV, texture, stream, viewport); } -int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) { +int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport) { int threadsX = viewport.width / 2; int threadsY = viewport.height; dim3 block(threadsPerBlock); dim3 grid(div_align(threadsX, threadsPerBlock), threadsY); - RGBA_to_NV12<<>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get()); + RGBA_to_NV12<<>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get()); return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed"); } diff --git a/sunshine/platform/linux/cuda.h b/sunshine/platform/linux/cuda.h index 5811379f..b175140d 100644 --- a/sunshine/platform/linux/cuda.h +++ b/sunshine/platform/linux/cuda.h @@ -1,16 +1,18 @@ #if !defined(SUNSHINE_PLATFORM_CUDA_H) && defined(SUNSHINE_BUILD_CUDA) #define SUNSHINE_PLATFORM_CUDA_H -#include #include #include +#include +#include namespace platf { - class hwdevice_t; - class img_t; -} +class hwdevice_t; +class img_t; +} // namespace platf namespace cuda { + namespace nvfbc { std::vector display_names(); } @@ -21,8 +23,10 @@ int init(); typedef struct cudaArray *cudaArray_t; #if !defined(__CUDACC__) +typedef struct CUstream_st *cudaStream_t; typedef unsigned long long cudaTextureObject_t; #else /* defined(__CUDACC__) */ +typedef __location__(device_builtin) struct CUstream_st *cudaStream_t; typedef __location__(device_builtin) unsigned long long cudaTextureObject_t; #endif /* !defined(__CUDACC__) */ @@ -33,7 +37,15 @@ public: void operator()(void *ptr); }; -using ptr_t = std::unique_ptr; +class freeCudaStream_t { +public: + void operator()(cudaStream_t ptr); +}; + +using ptr_t = std::unique_ptr; +using stream_t = std::unique_ptr; + +stream_t make_stream(int flags = 0); struct viewport_t { int width, height; @@ -75,8 +87,8 @@ public: static std::optional make(int in_width, int in_height, int out_width, int out_height, int pitch); // Converts loaded image into a CUDevicePtr - int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture); - int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport); + int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream); + int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport); void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range); diff --git a/sunshine/platform/linux/input.cpp b/sunshine/platform/linux/input.cpp index a0d18578..fbdc8133 100644 --- a/sunshine/platform/linux/input.cpp +++ b/sunshine/platform/linux/input.cpp @@ -696,7 +696,13 @@ public: }; inline void rumbleIterate(std::vector &effects, std::vector &polls, std::chrono::milliseconds to) { - auto res = poll(&polls.data()->el, polls.size(), to.count()); + std::vector polls_tmp; + polls_tmp.reserve(polls.size()); + for(auto &poll : polls) { + polls_tmp.emplace_back(poll.el); + } + + auto res = poll(polls_tmp.data(), polls.size(), to.count()); // If timed out if(!res) { @@ -871,7 +877,7 @@ void broadcastRumble(safe::queue_t &rumble_queue_queue) { } if(polls.empty()) { - std::this_thread::sleep_for(50ms); + std::this_thread::sleep_for(250ms); } else { rumbleIterate(effects, polls, 100ms); diff --git a/sunshine/platform/linux/misc.cpp b/sunshine/platform/linux/misc.cpp index dd114ec4..6d7643c0 100644 --- a/sunshine/platform/linux/misc.cpp +++ b/sunshine/platform/linux/misc.cpp @@ -264,13 +264,11 @@ std::unique_ptr init() { #endif #ifdef SUNSHINE_BUILD_CUDA if(verify_nvfbc()) { - BOOST_LOG(info) << "Using NvFBC for screencasting"sv; sources[source::NVFBC] = true; } #endif #ifdef SUNSHINE_BUILD_WAYLAND if(verify_wl()) { - BOOST_LOG(info) << "Using Wayland for screencasting"sv; sources[source::WAYLAND] = true; } #endif @@ -282,13 +280,11 @@ std::unique_ptr init() { display_cursor = false; } - BOOST_LOG(info) << "Using KMS for screencasting"sv; sources[source::KMS] = true; } #endif #ifdef SUNSHINE_BUILD_X11 if(verify_x11()) { - BOOST_LOG(info) << "Using X11 for screencasting"sv; sources[source::X11] = true; } #endif diff --git a/sunshine/video.cpp b/sunshine/video.cpp index f4a4c523..84038a6f 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -456,19 +456,19 @@ static encoder_t amdvce { { "gops_per_idr"s, 30 }, { "usage"s, "ultralowlatency"s }, { "quality"s, &config::video.amd.quality }, - { "rc"s, &config::video.amd.rc }, + { "rc"s, &config::video.amd.rc_hevc }, }, - std::make_optional({ "qp"s, &config::video.qp }), + std::make_optional({ "qp_p"s, &config::video.qp }), "hevc_amf"s, }, { { { "usage"s, "ultralowlatency"s }, { "quality"s, &config::video.amd.quality }, - { "rc"s, &config::video.amd.rc }, + { "rc"s, &config::video.amd.rc_h264 }, { "log_to_dbg"s, "1"s }, }, - std::make_optional({ "qp"s, &config::video.qp }), + std::make_optional({ "qp_p"s, &config::video.qp }), "h264_amf"s, }, DEFAULT,