Eliminate a redundant alloc+copy of each frame (#2852)
* Eliminate a redundant alloc+copy for each frame * Replace std::copy_n() with std::memcpy() for better code generation
This commit is contained in:
parent
18e7dfb190
commit
d57c66fa9c
2 changed files with 86 additions and 36 deletions
|
|
@ -674,11 +674,14 @@ namespace stream {
|
||||||
for (auto x = 0; x < nr_shards; ++x) {
|
for (auto x = 0; x < nr_shards; ++x) {
|
||||||
shards_p[x] = (uint8_t *) &shards[(x + 1) * prefixsize + x * blocksize];
|
shards_p[x] = (uint8_t *) &shards[(x + 1) * prefixsize + x * blocksize];
|
||||||
|
|
||||||
|
// GCC doesn't figure out that std::copy_n() can be replaced with memcpy() here
|
||||||
|
// and ends up compiling a horribly slow element-by-element copy loop, so we
|
||||||
|
// help it by using memcpy()/memset() directly.
|
||||||
auto copy_len = std::min<size_t>(blocksize, std::end(payload) - next);
|
auto copy_len = std::min<size_t>(blocksize, std::end(payload) - next);
|
||||||
std::copy_n(next, copy_len, shards_p[x]);
|
std::memcpy(shards_p[x], next, copy_len);
|
||||||
if (copy_len < blocksize) {
|
if (copy_len < blocksize) {
|
||||||
// Zero any additional space after the end of the payload
|
// Zero any additional space after the end of the payload
|
||||||
std::fill_n(shards_p[x] + copy_len, blocksize - copy_len, 0);
|
std::memset(shards_p[x] + copy_len, 0, blocksize - copy_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
next += copy_len;
|
next += copy_len;
|
||||||
|
|
@ -702,32 +705,50 @@ namespace stream {
|
||||||
}
|
}
|
||||||
} // namespace fec
|
} // namespace fec
|
||||||
|
|
||||||
template <class F>
|
/**
|
||||||
|
* @brief Combines two buffers and inserts new buffers at each slice boundary of the result.
|
||||||
|
* @param insert_size The number of bytes to insert.
|
||||||
|
* @param slice_size The number of bytes between insertions.
|
||||||
|
* @param data1 The first data buffer.
|
||||||
|
* @param data2 The second data buffer.
|
||||||
|
*/
|
||||||
std::vector<uint8_t>
|
std::vector<uint8_t>
|
||||||
insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data, F &&f) {
|
concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2) {
|
||||||
auto pad = data.size() % slice_size != 0;
|
auto data_size = data1.size() + data2.size();
|
||||||
auto elements = data.size() / slice_size + (pad ? 1 : 0);
|
auto pad = data_size % slice_size != 0;
|
||||||
|
auto elements = data_size / slice_size + (pad ? 1 : 0);
|
||||||
|
|
||||||
std::vector<uint8_t> result;
|
std::vector<uint8_t> result;
|
||||||
result.resize(elements * insert_size + data.size());
|
result.resize(elements * insert_size + data_size);
|
||||||
|
|
||||||
auto next = std::begin(data);
|
auto next = std::begin(data1);
|
||||||
for (auto x = 0; x < elements - 1; ++x) {
|
auto end = std::end(data1);
|
||||||
|
for (auto x = 0; x < elements; ++x) {
|
||||||
void *p = &result[x * (insert_size + slice_size)];
|
void *p = &result[x * (insert_size + slice_size)];
|
||||||
|
|
||||||
f(p, x, elements);
|
// For the last iteration, only copy to the end of the data
|
||||||
|
if (x == elements - 1) {
|
||||||
|
slice_size = data_size - (x * slice_size);
|
||||||
|
}
|
||||||
|
|
||||||
std::copy(next, next + slice_size, (char *) p + insert_size);
|
// Test if this slice will extend into the next buffer
|
||||||
next += slice_size;
|
if (next + slice_size > end) {
|
||||||
|
// Copy the first portion from the first buffer
|
||||||
|
auto copy_len = end - next;
|
||||||
|
std::copy(next, end, (char *) p + insert_size);
|
||||||
|
|
||||||
|
// Copy the remaining portion from the second buffer
|
||||||
|
next = std::begin(data2);
|
||||||
|
end = std::end(data2);
|
||||||
|
std::copy(next, next + (slice_size - copy_len), (char *) p + copy_len + insert_size);
|
||||||
|
next += slice_size - copy_len;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::copy(next, next + slice_size, (char *) p + insert_size);
|
||||||
|
next += slice_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto x = elements - 1;
|
|
||||||
void *p = &result[x * (insert_size + slice_size)];
|
|
||||||
|
|
||||||
f(p, x, elements);
|
|
||||||
|
|
||||||
std::copy(next, std::end(data), (char *) p + insert_size);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1314,24 +1335,13 @@ namespace stream {
|
||||||
frame_header.frame_processing_latency = 0;
|
frame_header.frame_processing_latency = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> payload_new;
|
|
||||||
std::copy_n((uint8_t *) &frame_header, sizeof(frame_header), std::back_inserter(payload_new));
|
|
||||||
std::copy(std::begin(payload), std::end(payload), std::back_inserter(payload_new));
|
|
||||||
|
|
||||||
payload = { (char *) payload_new.data(), payload_new.size() };
|
|
||||||
|
|
||||||
// insert packet headers
|
|
||||||
auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE;
|
|
||||||
auto payload_blocksize = blocksize - sizeof(video_packet_raw_t);
|
|
||||||
|
|
||||||
auto fecPercentage = config::stream.fec_percentage;
|
auto fecPercentage = config::stream.fec_percentage;
|
||||||
|
|
||||||
payload_new = insert(sizeof(video_packet_raw_t), payload_blocksize,
|
// Insert space for packet headers
|
||||||
payload, [&](void *p, int fecIndex, int end) {
|
auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE;
|
||||||
video_packet_raw_t *video_packet = (video_packet_raw_t *) p;
|
auto payload_blocksize = blocksize - sizeof(video_packet_raw_t);
|
||||||
|
auto payload_new = concat_and_insert(sizeof(video_packet_raw_t), payload_blocksize,
|
||||||
video_packet->packet.flags = FLAG_CONTAINS_PIC_DATA;
|
std::string_view { (char *) &frame_header, sizeof(frame_header) }, payload);
|
||||||
});
|
|
||||||
|
|
||||||
payload = std::string_view { (char *) payload_new.data(), payload_new.size() };
|
payload = std::string_view { (char *) payload_new.data(), payload_new.size() };
|
||||||
|
|
||||||
|
|
@ -1422,10 +1432,10 @@ namespace stream {
|
||||||
inspect->packet.multiFecFlags = 0x10;
|
inspect->packet.multiFecFlags = 0x10;
|
||||||
inspect->packet.multiFecBlocks = (blockIndex << 4) | ((fec_blocks_needed - 1) << 6);
|
inspect->packet.multiFecBlocks = (blockIndex << 4) | ((fec_blocks_needed - 1) << 6);
|
||||||
|
|
||||||
|
inspect->packet.flags = FLAG_CONTAINS_PIC_DATA;
|
||||||
if (x == 0) {
|
if (x == 0) {
|
||||||
inspect->packet.flags |= FLAG_SOF;
|
inspect->packet.flags |= FLAG_SOF;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x == packets - 1) {
|
if (x == packets - 1) {
|
||||||
inspect->packet.flags |= FLAG_EOF;
|
inspect->packet.flags |= FLAG_EOF;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
40
tests/unit/test_stream.cpp
Normal file
40
tests/unit/test_stream.cpp
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* @file tests/unit/test_stream.cpp
|
||||||
|
* @brief Test src/stream.*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace stream {
|
||||||
|
std::vector<uint8_t>
|
||||||
|
concat_and_insert(uint64_t insert_size, uint64_t slice_size, const std::string_view &data1, const std::string_view &data2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <tests/conftest.cpp>
|
||||||
|
|
||||||
|
TEST(ConcatAndInsertTests, ConcatNoInsertionTest) {
|
||||||
|
char b1[] = { 'a', 'b' };
|
||||||
|
char b2[] = { 'c', 'd', 'e' };
|
||||||
|
auto res = stream::concat_and_insert(0, 2, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
|
||||||
|
auto expected = std::vector<uint8_t> { 'a', 'b', 'c', 'd', 'e' };
|
||||||
|
ASSERT_EQ(res, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ConcatAndInsertTests, ConcatLargeStrideTest) {
|
||||||
|
char b1[] = { 'a', 'b' };
|
||||||
|
char b2[] = { 'c', 'd', 'e' };
|
||||||
|
auto res = stream::concat_and_insert(1, sizeof(b1) + sizeof(b2) + 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
|
||||||
|
auto expected = std::vector<uint8_t> { 0, 'a', 'b', 'c', 'd', 'e' };
|
||||||
|
ASSERT_EQ(res, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ConcatAndInsertTests, ConcatSmallStrideTest) {
|
||||||
|
char b1[] = { 'a', 'b' };
|
||||||
|
char b2[] = { 'c', 'd', 'e' };
|
||||||
|
auto res = stream::concat_and_insert(1, 1, std::string_view { b1, sizeof(b1) }, std::string_view { b2, sizeof(b2) });
|
||||||
|
auto expected = std::vector<uint8_t> { 0, 'a', 0, 'b', 0, 'c', 0, 'd', 0, 'e' };
|
||||||
|
ASSERT_EQ(res, expected);
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue