Resolve merge conflicts

This commit is contained in:
loki-47-6F-64 2021-10-01 11:30:12 +02:00
commit 997095ce39
10 changed files with 144 additions and 47 deletions

View file

@ -20,11 +20,11 @@ Install the following:
#### Common
```
sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev
sudo apt install cmake gcc-10 g++-10 libssl-dev libavdevice-dev libboost-thread-dev libboost-filesystem-dev libboost-log-dev libpulse-dev libopus-dev libevdev-dev
```
#### X11
```
sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libevdev-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev
sudo apt install libxtst-dev libx11-dev libxrandr-dev libxfixes-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev
```
#### KMS

View file

@ -38,7 +38,7 @@ Architecture: amd64
Maintainer: @loki
Priority: optional
Version: 0.11.0
Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0, libboost-log1.67.0 | libboost-log1.71.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2
Depends: libssl1.1, libavdevice58, libboost-thread1.67.0 | libboost-thread1.71.0 | libboost-thread1.74.0, libboost-filesystem1.67.0 | libboost-filesystem1.71.0 | libboost-filesystem1.74.0, libboost-log1.67.0 | libboost-log1.71.0 | libboost-log1.74.0, libpulse0, libopus0, libxcb-shm0, libxcb-xfixes0, libxtst6, libevdev2, libdrm2, libcap2
Description: Gamestream host for Moonlight
EOF

View file

@ -99,16 +99,22 @@ enum quality_e : int {
_default = 0,
speed,
balanced,
//quality2,
};
enum rc_e : int {
enum class rc_hevc_e : int {
constqp, /**< Constant QP mode */
vbr_latency, /**< Latency Constrained Variable Bitrate */
vbr_peak, /**< Peak Contrained Variable Bitrate */
cbr, /**< Constant bitrate mode */
};
enum class rc_h264_e : int {
constqp, /**< Constant QP mode */
cbr, /**< Constant bitrate mode */
vbr_peak, /**< Peak Contrained Variable Bitrate */
vbr_latency, /**< Latency Constrained Variable Bitrate */
};
enum coder_e : int {
_auto = 0,
cabac,
@ -120,15 +126,25 @@ std::optional<quality_e> quality_from_view(const std::string_view &quality) {
if(quality == #x##sv) return x
_CONVERT_(speed);
_CONVERT_(balanced);
//_CONVERT_(quality2);
if(quality == "default"sv) return _default;
#undef _CONVERT_
return std::nullopt;
}
std::optional<rc_e> rc_from_view(const std::string_view &rc) {
std::optional<int> rc_h264_from_view(const std::string_view &rc) {
#define _CONVERT_(x) \
if(rc == #x##sv) return x
if(rc == #x##sv) return (int)rc_h264_e::x
_CONVERT_(constqp);
_CONVERT_(vbr_latency);
_CONVERT_(vbr_peak);
_CONVERT_(cbr);
#undef _CONVERT_
return std::nullopt;
}
std::optional<int> rc_hevc_from_view(const std::string_view &rc) {
#define _CONVERT_(x) \
if(rc == #x##sv) return (int)rc_hevc_e::x
_CONVERT_(constqp);
_CONVERT_(vbr_latency);
_CONVERT_(vbr_peak);
@ -165,6 +181,7 @@ video_t video {
{
amd::balanced,
std::nullopt,
std::nullopt,
-1 }, // amd
{}, // encoder
@ -659,8 +676,14 @@ void apply_config(std::unordered_map<std::string, std::string> &&vars) {
int_f(vars, "nv_coder", video.nv.coder, nv::coder_from_view);
int_f(vars, "amd_quality", video.amd.quality, amd::quality_from_view);
int_f(vars, "amd_rc", video.amd.rc, amd::rc_from_view);
std::string rc;
string_f(vars, "amd_rc", rc);
int_f(vars, "amd_coder", video.amd.coder, amd::coder_from_view);
if(!rc.empty()) {
video.amd.rc_h264 = amd::rc_h264_from_view(rc);
video.amd.rc_hevc = amd::rc_hevc_from_view(rc);
}
string_f(vars, "encoder", video.encoder);
string_f(vars, "adapter_name", video.adapter_name);

View file

@ -29,7 +29,8 @@ struct video_t {
struct {
std::optional<int> quality;
std::optional<int> rc;
std::optional<int> rc_h264;
std::optional<int> rc_hevc;
int coder;
} amd;

View file

@ -56,6 +56,10 @@ inline static int check(CUresult result, const std::string_view &sv) {
return 0;
}
void freeStream(CUstream stream) {
CU_CHECK_IGNORE(cdf->cuStreamDestroy(stream), "Couldn't destroy cuda stream");
}
class img_t : public platf::img_t {
public:
tex_t tex;
@ -94,7 +98,8 @@ public:
this->hwframe.reset(frame);
this->frame = frame;
if(((AVHWFramesContext *)frame->hw_frames_ctx->data)->sw_format != AV_PIX_FMT_NV12) {
auto hwframe_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
if(hwframe_ctx->sw_format != AV_PIX_FMT_NV12) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv;
return -1;
}
@ -105,6 +110,15 @@ public:
return -1;
}
auto cuda_ctx = (AVCUDADeviceContext *)hwframe_ctx->device_ctx->hwctx;
stream = make_stream();
if(!stream) {
return -1;
}
cuda_ctx->stream = stream.get();
auto sws_opt = sws_t::make(width, height, frame->width, frame->height, width * 4);
if(!sws_opt) {
return -1;
@ -142,13 +156,14 @@ public:
return;
}
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, { frame->width, frame->height, 0, 0 });
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), { frame->width, frame->height, 0, 0 });
}
cudaTextureObject_t tex_obj(const tex_t &tex) const {
return linear_interpolation ? tex.texture.linear : tex.texture.point;
}
stream_t stream;
frame_t hwframe;
int width, height;
@ -162,7 +177,7 @@ public:
class cuda_ram_t : public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex));
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
}
int set_frame(AVFrame *frame) {
@ -186,7 +201,7 @@ public:
class cuda_vram_t : public cuda_t {
public:
int convert(platf::img_t &img) override {
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex));
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *)&img)->tex), stream.get());
}
};
@ -256,6 +271,28 @@ int init() {
return 0;
}
class ctx_t {
public:
ctx_t(NVFBC_SESSION_HANDLE handle) {
NVFBC_BIND_CONTEXT_PARAMS params { NVFBC_BIND_CONTEXT_PARAMS_VER };
if(func.nvFBCBindContext(handle, &params)) {
BOOST_LOG(error) << "Couldn't bind NvFBC context to current thread: " << func.nvFBCGetLastErrorStr(handle);
}
this->handle = handle;
}
~ctx_t() {
NVFBC_RELEASE_CONTEXT_PARAMS params { NVFBC_RELEASE_CONTEXT_PARAMS_VER };
if(func.nvFBCReleaseContext(handle, &params)) {
BOOST_LOG(error) << "Couldn't release NvFBC context from current thread: " << func.nvFBCGetLastErrorStr(handle);
}
}
NVFBC_SESSION_HANDLE handle;
};
class handle_t {
enum flag_e {
SESSION_HANDLE,
@ -347,24 +384,26 @@ public:
return 0;
}
~handle_t() {
int reset() {
if(!handle_flags[SESSION_HANDLE]) {
return;
return 0;
}
if(handle_flags[SESSION_CAPTURE]) {
NVFBC_DESTROY_CAPTURE_SESSION_PARAMS params { NVFBC_DESTROY_CAPTURE_SESSION_PARAMS_VER };
if(func.nvFBCDestroyCaptureSession(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy capture session: "sv << func.nvFBCGetLastErrorStr(handle);
}
}
stop();
NVFBC_DESTROY_HANDLE_PARAMS params { NVFBC_DESTROY_HANDLE_PARAMS_VER };
if(func.nvFBCDestroyHandle(handle, &params)) {
BOOST_LOG(error) << "Couldn't destroy session handle: "sv << func.nvFBCGetLastErrorStr(handle);
}
handle_flags[SESSION_HANDLE] = false;
return 0;
}
~handle_t() {
reset();
}
std::bitset<MAX_FLAGS> handle_flags;
@ -380,6 +419,8 @@ public:
return -1;
}
ctx_t ctx { handle->handle };
auto status_params = handle->status();
if(!status_params) {
return -1;
@ -442,8 +483,9 @@ public:
// Force display_t::capture to initialize handle_t::capture
cursor_visible = !*cursor;
ctx_t ctx { handle.handle };
auto fg = util::fail_guard([&]() {
handle.stop();
handle.reset();
});
while(img) {

View file

@ -110,6 +110,23 @@ void freeCudaPtr_t::operator()(void *ptr) {
CU_CHECK_IGNORE(cudaFree(ptr), "Couldn't free cuda device pointer");
}
void freeCudaStream_t::operator()(cudaStream_t ptr) {
CU_CHECK_IGNORE(cudaStreamDestroy(ptr), "Couldn't free cuda stream");
}
stream_t make_stream(int flags) {
cudaStream_t stream;
if(!flags) {
CU_CHECK_PTR(cudaStreamCreate(&stream), "Couldn't create cuda stream");
}
else {
CU_CHECK_PTR(cudaStreamCreateWithFlags(&stream, flags), "Couldn't create cuda stream with flags");
}
return stream_t { stream };
}
inline __device__ float3 bgra_to_rgb(uchar4 vec) {
return make_float3((float)vec.z, (float)vec.y, (float)vec.x);
}
@ -203,8 +220,8 @@ std::optional<tex_t> tex_t::make(int height, int pitch) {
tex_t::tex_t() : array {}, texture { INVALID_TEXTURE } {}
tex_t::tex_t(tex_t &&other) : array { other.array }, texture { other.texture } {
other.array = 0;
other.texture.point = INVALID_TEXTURE;
other.array = 0;
other.texture.point = INVALID_TEXTURE;
other.texture.linear = INVALID_TEXTURE;
}
@ -269,18 +286,18 @@ std::optional<sws_t> sws_t::make(int in_width, int in_height, int out_width, int
return std::make_optional<sws_t>(in_width, in_height, out_width, out_height, pitch, props.maxThreadsPerMultiProcessor / props.maxBlocksPerMultiProcessor, std::move(ptr));
}
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture) {
return convert(Y, UV, pitchY, pitchUV, texture, viewport);
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream) {
return convert(Y, UV, pitchY, pitchUV, texture, stream, viewport);
}
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport) {
int sws_t::convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport) {
int threadsX = viewport.width / 2;
int threadsY = viewport.height;
dim3 block(threadsPerBlock);
dim3 grid(div_align(threadsX, threadsPerBlock), threadsY);
RGBA_to_NV12<<<grid, block>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
RGBA_to_NV12<<<grid, block, 0, stream>>>(texture, Y, UV, pitchY, pitchUV, scale, viewport, (video::color_t *)color_matrix.get());
return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed");
}

View file

@ -1,16 +1,18 @@
#if !defined(SUNSHINE_PLATFORM_CUDA_H) && defined(SUNSHINE_BUILD_CUDA)
#define SUNSHINE_PLATFORM_CUDA_H
#include <vector>
#include <memory>
#include <optional>
#include <string>
#include <vector>
namespace platf {
class hwdevice_t;
class img_t;
}
class hwdevice_t;
class img_t;
} // namespace platf
namespace cuda {
namespace nvfbc {
std::vector<std::string> display_names();
}
@ -21,8 +23,10 @@ int init();
typedef struct cudaArray *cudaArray_t;
#if !defined(__CUDACC__)
typedef struct CUstream_st *cudaStream_t;
typedef unsigned long long cudaTextureObject_t;
#else /* defined(__CUDACC__) */
typedef __location__(device_builtin) struct CUstream_st *cudaStream_t;
typedef __location__(device_builtin) unsigned long long cudaTextureObject_t;
#endif /* !defined(__CUDACC__) */
@ -33,7 +37,15 @@ public:
void operator()(void *ptr);
};
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
class freeCudaStream_t {
public:
void operator()(cudaStream_t ptr);
};
using ptr_t = std::unique_ptr<void, freeCudaPtr_t>;
using stream_t = std::unique_ptr<CUstream_st, freeCudaStream_t>;
stream_t make_stream(int flags = 0);
struct viewport_t {
int width, height;
@ -75,8 +87,8 @@ public:
static std::optional<sws_t> make(int in_width, int in_height, int out_width, int out_height, int pitch);
// Converts loaded image into a CUDevicePtr
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, const viewport_t &viewport);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream);
int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport);
void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range);

View file

@ -696,7 +696,13 @@ public:
};
inline void rumbleIterate(std::vector<effect_t> &effects, std::vector<pollfd_t> &polls, std::chrono::milliseconds to) {
auto res = poll(&polls.data()->el, polls.size(), to.count());
std::vector<pollfd> polls_tmp;
polls_tmp.reserve(polls.size());
for(auto &poll : polls) {
polls_tmp.emplace_back(poll.el);
}
auto res = poll(polls_tmp.data(), polls.size(), to.count());
// If timed out
if(!res) {
@ -871,7 +877,7 @@ void broadcastRumble(safe::queue_t<mail_evdev_t> &rumble_queue_queue) {
}
if(polls.empty()) {
std::this_thread::sleep_for(50ms);
std::this_thread::sleep_for(250ms);
}
else {
rumbleIterate(effects, polls, 100ms);

View file

@ -264,13 +264,11 @@ std::unique_ptr<deinit_t> init() {
#endif
#ifdef SUNSHINE_BUILD_CUDA
if(verify_nvfbc()) {
BOOST_LOG(info) << "Using NvFBC for screencasting"sv;
sources[source::NVFBC] = true;
}
#endif
#ifdef SUNSHINE_BUILD_WAYLAND
if(verify_wl()) {
BOOST_LOG(info) << "Using Wayland for screencasting"sv;
sources[source::WAYLAND] = true;
}
#endif
@ -282,13 +280,11 @@ std::unique_ptr<deinit_t> init() {
display_cursor = false;
}
BOOST_LOG(info) << "Using KMS for screencasting"sv;
sources[source::KMS] = true;
}
#endif
#ifdef SUNSHINE_BUILD_X11
if(verify_x11()) {
BOOST_LOG(info) << "Using X11 for screencasting"sv;
sources[source::X11] = true;
}
#endif

View file

@ -456,19 +456,19 @@ static encoder_t amdvce {
{ "gops_per_idr"s, 30 },
{ "usage"s, "ultralowlatency"s },
{ "quality"s, &config::video.amd.quality },
{ "rc"s, &config::video.amd.rc },
{ "rc"s, &config::video.amd.rc_hevc },
},
std::make_optional<encoder_t::option_t>({ "qp"s, &config::video.qp }),
std::make_optional<encoder_t::option_t>({ "qp_p"s, &config::video.qp }),
"hevc_amf"s,
},
{
{
{ "usage"s, "ultralowlatency"s },
{ "quality"s, &config::video.amd.quality },
{ "rc"s, &config::video.amd.rc },
{ "rc"s, &config::video.amd.rc_h264 },
{ "log_to_dbg"s, "1"s },
},
std::make_optional<encoder_t::option_t>({ "qp"s, &config::video.qp }),
std::make_optional<encoder_t::option_t>({ "qp_p"s, &config::video.qp }),
"h264_amf"s,
},
DEFAULT,