mirror of
https://github.com/yuzu-emu/yuzu-android.git
synced 2025-06-18 04:38:02 -05:00
video_core: NVDEC Implementation
This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>
This commit is contained in:
114
src/video_core/command_classes/codecs/codec.cpp
Normal file
114
src/video_core/command_classes/codecs/codec.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/codecs/codec.h"
|
||||
#include "video_core/command_classes/codecs/h264.h"
|
||||
#include "video_core/command_classes/codecs/vp9.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/opt.h>
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
Codec::Codec(GPU& gpu_)
|
||||
: gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
||||
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
|
||||
|
||||
Codec::~Codec() {
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
// Free libav memory
|
||||
avcodec_send_packet(av_codec_ctx, nullptr);
|
||||
avcodec_receive_frame(av_codec_ctx, av_frame);
|
||||
avcodec_flush_buffers(av_codec_ctx);
|
||||
|
||||
av_frame_unref(av_frame);
|
||||
av_free(av_frame);
|
||||
avcodec_close(av_codec_ctx);
|
||||
}
|
||||
|
||||
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
|
||||
current_codec = codec;
|
||||
}
|
||||
|
||||
void Codec::StateWrite(u32 offset, u64 arguments) {
|
||||
u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
|
||||
std::memcpy(state_offset, &arguments, sizeof(u64));
|
||||
}
|
||||
|
||||
void Codec::Decode() {
|
||||
bool is_first_frame = false;
|
||||
|
||||
if (!initialized) {
|
||||
if (current_codec == NvdecCommon::VideoCodec::H264) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
|
||||
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
|
||||
} else {
|
||||
LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec));
|
||||
return;
|
||||
}
|
||||
|
||||
av_codec_ctx = avcodec_alloc_context3(av_codec);
|
||||
av_frame = av_frame_alloc();
|
||||
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
|
||||
|
||||
// TODO(ameerj): libavcodec gpu hw acceleration
|
||||
|
||||
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
|
||||
if (av_error < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
|
||||
av_frame_unref(av_frame);
|
||||
av_free(av_frame);
|
||||
avcodec_close(av_codec_ctx);
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
is_first_frame = true;
|
||||
}
|
||||
bool vp9_hidden_frame = false;
|
||||
|
||||
AVPacket packet{};
|
||||
av_init_packet(&packet);
|
||||
std::vector<u8> frame_data;
|
||||
|
||||
if (current_codec == NvdecCommon::VideoCodec::H264) {
|
||||
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
|
||||
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
|
||||
frame_data = vp9_decoder->ComposeFrameHeader(state);
|
||||
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
|
||||
}
|
||||
|
||||
packet.data = frame_data.data();
|
||||
packet.size = static_cast<int>(frame_data.size());
|
||||
|
||||
avcodec_send_packet(av_codec_ctx, &packet);
|
||||
|
||||
if (!vp9_hidden_frame) {
|
||||
// Only receive/store visible frames
|
||||
avcodec_receive_frame(av_codec_ctx, av_frame);
|
||||
}
|
||||
}
|
||||
|
||||
AVFrame* Codec::GetCurrentFrame() {
|
||||
return av_frame;
|
||||
}
|
||||
|
||||
const AVFrame* Codec::GetCurrentFrame() const {
|
||||
return av_frame;
|
||||
}
|
||||
|
||||
NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
|
||||
return current_codec;
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
68
src/video_core/command_classes/codecs/codec.h
Normal file
68
src/video_core/command_classes/codecs/codec.h
Normal file
@ -0,0 +1,68 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
extern "C" {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
#include <libavcodec/avcodec.h>
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
struct VicRegisters;
|
||||
|
||||
namespace Decoder {
|
||||
class H264;
|
||||
class VP9;
|
||||
} // namespace Decoder
|
||||
|
||||
class Codec {
|
||||
public:
|
||||
explicit Codec(GPU& gpu);
|
||||
~Codec();
|
||||
|
||||
/// Sets NVDEC video stream codec
|
||||
void SetTargetCodec(NvdecCommon::VideoCodec codec);
|
||||
|
||||
/// Populate NvdecRegisters state with argument value at the provided offset
|
||||
void StateWrite(u32 offset, u64 arguments);
|
||||
|
||||
/// Call decoders to construct headers, decode AVFrame with ffmpeg
|
||||
void Decode();
|
||||
|
||||
/// Returns most recently decoded frame
|
||||
AVFrame* GetCurrentFrame();
|
||||
const AVFrame* GetCurrentFrame() const;
|
||||
|
||||
/// Returns the value of current_codec
|
||||
NvdecCommon::VideoCodec GetCurrentCodec() const;
|
||||
|
||||
private:
|
||||
bool initialized{};
|
||||
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
|
||||
|
||||
AVCodec* av_codec{nullptr};
|
||||
AVCodecContext* av_codec_ctx{nullptr};
|
||||
AVFrame* av_frame{nullptr};
|
||||
|
||||
GPU& gpu;
|
||||
std::unique_ptr<Decoder::H264> h264_decoder;
|
||||
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
||||
|
||||
NvdecCommon::NvdecRegisters state{};
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
276
src/video_core/command_classes/codecs/h264.cpp
Normal file
276
src/video_core/command_classes/codecs/h264.cpp
Normal file
@ -0,0 +1,276 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "common/bit_util.h"
|
||||
#include "video_core/command_classes/codecs/h264.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
H264::H264(GPU& gpu_) : gpu(gpu_) {}
|
||||
|
||||
H264::~H264() = default;
|
||||
|
||||
std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
|
||||
H264DecoderContext context{};
|
||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
||||
|
||||
const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
|
||||
if (!is_first_frame && frame_number != 0) {
|
||||
frame.resize(context.frame_data_size);
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
|
||||
} else {
|
||||
/// Encode header
|
||||
H264BitWriter writer{};
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(7, 5);
|
||||
writer.WriteU(100, 8);
|
||||
writer.WriteU(0, 8);
|
||||
writer.WriteU(31, 8);
|
||||
writer.WriteUe(0);
|
||||
const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
|
||||
writer.WriteUe(chroma_format_idc);
|
||||
if (chroma_format_idc == 3) {
|
||||
writer.WriteBit(false);
|
||||
}
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
|
||||
writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
|
||||
writer.WriteUe(order_cnt_type);
|
||||
if (order_cnt_type == 0) {
|
||||
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
|
||||
} else if (order_cnt_type == 1) {
|
||||
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
|
||||
writer.WriteSe(0);
|
||||
writer.WriteSe(0);
|
||||
writer.WriteUe(0);
|
||||
}
|
||||
|
||||
const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
|
||||
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
|
||||
writer.WriteUe(16);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(pic_height - 1);
|
||||
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
|
||||
if (!context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
|
||||
}
|
||||
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
writer.End();
|
||||
|
||||
// H264 PPS
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(8, 5);
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
|
||||
writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
|
||||
s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
|
||||
pic_init_qp = (pic_init_qp << 26) >> 26;
|
||||
writer.WriteSe(pic_init_qp);
|
||||
writer.WriteSe(0);
|
||||
s32 chroma_qp_index_offset =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
|
||||
chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset);
|
||||
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
|
||||
writer.WriteBit(true);
|
||||
|
||||
for (s32 index = 0; index < 6; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x4 =
|
||||
std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
|
||||
writer.WriteScalingList(matrix_x4, index * 16, 16);
|
||||
}
|
||||
|
||||
if (context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
|
||||
context.scaling_matrix_8.end());
|
||||
|
||||
writer.WriteScalingList(matrix_x8, index * 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
s32 chroma_qp_index_offset2 =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
|
||||
chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset2);
|
||||
|
||||
writer.End();
|
||||
|
||||
const auto& encoded_header = writer.GetByteArray();
|
||||
frame.resize(encoded_header.size() + context.frame_data_size);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
|
||||
frame.data() + encoded_header.size(),
|
||||
context.frame_data_size);
|
||||
}
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
H264BitWriter::H264BitWriter() = default;
|
||||
|
||||
H264BitWriter::~H264BitWriter() = default;
|
||||
|
||||
void H264BitWriter::WriteU(s32 value, s32 value_sz) {
|
||||
WriteBits(value, value_sz);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteSe(s32 value) {
|
||||
WriteExpGolombCodedInt(value);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteUe(s32 value) {
|
||||
WriteExpGolombCodedUInt((u32)value);
|
||||
}
|
||||
|
||||
void H264BitWriter::End() {
|
||||
WriteBit(true);
|
||||
Flush();
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteBit(bool state) {
|
||||
WriteBits(state ? 1 : 0, 1);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
|
||||
std::vector<u8> scan(count);
|
||||
if (count == 16) {
|
||||
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
|
||||
} else {
|
||||
std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
|
||||
}
|
||||
u8 last_scale = 8;
|
||||
|
||||
for (s32 index = 0; index < count; index++) {
|
||||
const u8 value = list[start + scan[index]];
|
||||
const s32 delta_scale = static_cast<s32>(value - last_scale);
|
||||
|
||||
WriteSe(delta_scale);
|
||||
|
||||
last_scale = value;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<u8>& H264BitWriter::GetByteArray() {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
const std::vector<u8>& H264BitWriter::GetByteArray() const {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
|
||||
s32 value_pos = 0;
|
||||
|
||||
s32 remaining = bit_count;
|
||||
|
||||
while (remaining > 0) {
|
||||
s32 copy_size = remaining;
|
||||
|
||||
const s32 free_bits = GetFreeBufferBits();
|
||||
|
||||
if (copy_size > free_bits) {
|
||||
copy_size = free_bits;
|
||||
}
|
||||
|
||||
const s32 mask = (1 << copy_size) - 1;
|
||||
|
||||
const s32 src_shift = (bit_count - value_pos) - copy_size;
|
||||
const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
|
||||
|
||||
buffer |= ((value >> src_shift) & mask) << dst_shift;
|
||||
|
||||
value_pos += copy_size;
|
||||
buffer_pos += copy_size;
|
||||
remaining -= copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
|
||||
const s32 sign = value <= 0 ? 0 : 1;
|
||||
if (value < 0) {
|
||||
value = -value;
|
||||
}
|
||||
value = (value << 1) - sign;
|
||||
WriteExpGolombCodedUInt(value);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
|
||||
const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
|
||||
WriteBits(1, size);
|
||||
|
||||
value -= (1U << (size - 1)) - 1;
|
||||
WriteBits(static_cast<s32>(value), size - 1);
|
||||
}
|
||||
|
||||
s32 H264BitWriter::GetFreeBufferBits() {
|
||||
if (buffer_pos == buffer_size) {
|
||||
Flush();
|
||||
}
|
||||
|
||||
return buffer_size - buffer_pos;
|
||||
}
|
||||
|
||||
void H264BitWriter::Flush() {
|
||||
if (buffer_pos == 0) {
|
||||
return;
|
||||
}
|
||||
byte_array.push_back(static_cast<u8>(buffer));
|
||||
|
||||
buffer = 0;
|
||||
buffer_pos = 0;
|
||||
}
|
||||
} // namespace Tegra::Decoder
|
130
src/video_core/command_classes/codecs/h264.h
Normal file
130
src/video_core/command_classes/codecs/h264.h
Normal file
@ -0,0 +1,130 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
namespace Decoder {
|
||||
|
||||
class H264BitWriter {
|
||||
public:
|
||||
H264BitWriter();
|
||||
~H264BitWriter();
|
||||
|
||||
/// The following Write methods are based on clause 9.1 in the H.264 specification.
|
||||
/// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
|
||||
void WriteU(s32 value, s32 value_sz);
|
||||
void WriteSe(s32 value);
|
||||
void WriteUe(s32 value);
|
||||
|
||||
/// Finalize the bitstream
|
||||
void End();
|
||||
|
||||
/// append a bit to the stream, equivalent value to the state parameter
|
||||
void WriteBit(bool state);
|
||||
|
||||
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
|
||||
/// Writes the scaling matrices of the sream
|
||||
void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
|
||||
|
||||
/// Return the bitstream as a vector.
|
||||
std::vector<u8>& GetByteArray();
|
||||
const std::vector<u8>& GetByteArray() const;
|
||||
|
||||
private:
|
||||
// ZigZag LUTs from libavcodec.
|
||||
static constexpr std::array<u8, 64> zig_zag_direct{
|
||||
0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
|
||||
41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
|
||||
30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
|
||||
};
|
||||
|
||||
static constexpr std::array<u8, 16> zig_zag_scan{
|
||||
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
|
||||
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
|
||||
};
|
||||
|
||||
void WriteBits(s32 value, s32 bit_count);
|
||||
void WriteExpGolombCodedInt(s32 value);
|
||||
void WriteExpGolombCodedUInt(u32 value);
|
||||
s32 GetFreeBufferBits();
|
||||
void Flush();
|
||||
|
||||
s32 buffer_size{8};
|
||||
|
||||
s32 buffer{};
|
||||
s32 buffer_pos{};
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class H264 {
|
||||
public:
|
||||
explicit H264(GPU& gpu);
|
||||
~H264();
|
||||
|
||||
/// Compose the H264 header of the frame for FFmpeg decoding
|
||||
std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
|
||||
bool is_first_frame = false);
|
||||
|
||||
private:
|
||||
struct H264ParameterSet {
|
||||
u32 log2_max_pic_order_cnt{};
|
||||
u32 delta_pic_order_always_zero_flag{};
|
||||
u32 frame_mbs_only_flag{};
|
||||
u32 pic_width_in_mbs{};
|
||||
u32 pic_height_in_map_units{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 entropy_coding_mode_flag{};
|
||||
u32 bottom_field_pic_order_flag{};
|
||||
u32 num_refidx_l0_default_active{};
|
||||
u32 num_refidx_l1_default_active{};
|
||||
u32 deblocking_filter_control_flag{};
|
||||
u32 redundant_pic_count_flag{};
|
||||
u32 transform_8x8_mode_flag{};
|
||||
INSERT_PADDING_WORDS(9);
|
||||
u64 flags{};
|
||||
u32 frame_number{};
|
||||
u32 frame_number2{};
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_BYTES(0x48);
|
||||
u32 frame_data_size{};
|
||||
INSERT_PADDING_BYTES(0xc);
|
||||
H264ParameterSet h264_parameter_set{};
|
||||
INSERT_PADDING_BYTES(0x100);
|
||||
std::array<u8, 0x60> scaling_matrix_4;
|
||||
std::array<u8, 0x80> scaling_matrix_8;
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
|
||||
|
||||
std::vector<u8> frame;
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Tegra
|
1010
src/video_core/command_classes/codecs/vp9.cpp
Normal file
1010
src/video_core/command_classes/codecs/vp9.cpp
Normal file
File diff suppressed because it is too large
Load Diff
216
src/video_core/command_classes/codecs/vp9.h
Normal file
216
src/video_core/command_classes/codecs/vp9.h
Normal file
@ -0,0 +1,216 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/stream.h"
|
||||
#include "video_core/command_classes/codecs/vp9_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
enum class FrameType { KeyFrame = 0, InterFrame = 1 };
|
||||
namespace Decoder {
|
||||
|
||||
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
|
||||
/// VP9 header bitstreams.
|
||||
|
||||
class VpxRangeEncoder {
|
||||
public:
|
||||
VpxRangeEncoder();
|
||||
~VpxRangeEncoder();
|
||||
|
||||
/// Writes the rightmost value_size bits from value into the stream
|
||||
void Write(s32 value, s32 value_size);
|
||||
|
||||
/// Writes a single bit with half probability
|
||||
void Write(bool bit);
|
||||
|
||||
/// Writes a bit to the base_stream encoded with probability
|
||||
void Write(bool bit, s32 probability);
|
||||
|
||||
/// Signal the end of the bitstream
|
||||
void End();
|
||||
|
||||
std::vector<u8>& GetBuffer() {
|
||||
return base_stream.GetBuffer();
|
||||
}
|
||||
|
||||
const std::vector<u8>& GetBuffer() const {
|
||||
return base_stream.GetBuffer();
|
||||
}
|
||||
|
||||
private:
|
||||
u8 PeekByte();
|
||||
Common::Stream base_stream{};
|
||||
u32 low_value{};
|
||||
u32 range{0xff};
|
||||
s32 count{-24};
|
||||
s32 half_probability{128};
|
||||
static constexpr std::array<s32, 256> norm_lut{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
};
|
||||
|
||||
class VpxBitStreamWriter {
|
||||
public:
|
||||
VpxBitStreamWriter();
|
||||
~VpxBitStreamWriter();
|
||||
|
||||
/// Write an unsigned integer value
|
||||
void WriteU(u32 value, u32 value_size);
|
||||
|
||||
/// Write a signed integer value
|
||||
void WriteS(s32 value, u32 value_size);
|
||||
|
||||
/// Based on 6.2.10 of VP9 Spec, writes a delta coded value
|
||||
void WriteDeltaQ(u32 value);
|
||||
|
||||
/// Write a single bit.
|
||||
void WriteBit(bool state);
|
||||
|
||||
/// Pushes current buffer into buffer_array, resets buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns byte_array
|
||||
std::vector<u8>& GetByteArray();
|
||||
|
||||
/// Returns const byte_array
|
||||
const std::vector<u8>& GetByteArray() const;
|
||||
|
||||
private:
|
||||
/// Write bit_count bits from value into buffer
|
||||
void WriteBits(u32 value, u32 bit_count);
|
||||
|
||||
/// Gets next available position in buffer, invokes Flush() if buffer is full
|
||||
s32 GetFreeBufferBits();
|
||||
|
||||
s32 buffer_size{8};
|
||||
|
||||
s32 buffer{};
|
||||
s32 buffer_pos{};
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class VP9 {
|
||||
public:
|
||||
explicit VP9(GPU& gpu);
|
||||
~VP9();
|
||||
|
||||
/// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
|
||||
/// documentation
|
||||
std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Returns true if the most recent frame was a hidden frame.
|
||||
bool WasFrameHidden() const {
|
||||
return hidden;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Generates compressed header probability updates in the bitstream writer
|
||||
template <typename T, std::size_t N>
|
||||
void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob);
|
||||
|
||||
/// Generates compressed header probability updates in the bitstream writer
|
||||
/// If probs are not equal, WriteProbabilityDelta is invoked
|
||||
void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// Generates compressed header probability deltas in the bitstream writer
|
||||
void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
|
||||
s32 RemapProbability(s32 new_prob, s32 old_prob);
|
||||
|
||||
/// Recenters probability. Based on section 6.3.6 of VP9 Specification
|
||||
s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
|
||||
|
||||
/// Inverse of 6.3.4 Decode term subexp
|
||||
void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
|
||||
|
||||
/// Writes if the value is less than the test value
|
||||
bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
|
||||
|
||||
/// Writes probability updates for the Coef probabilities
|
||||
void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
|
||||
const std::array<u8, 2304>& new_prob,
|
||||
const std::array<u8, 2304>& old_prob);
|
||||
|
||||
/// Write probabilities for 4-byte aligned structures
|
||||
template <typename T, std::size_t N>
|
||||
void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob);
|
||||
|
||||
/// Write motion vector probability updates. 6.3.17 in the spec
|
||||
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// 6.2.14 Tile size calculation
|
||||
s32 CalcMinLog2TileCols(s32 frame_width);
|
||||
s32 CalcMaxLog2TileCols(s32 frame_width);
|
||||
|
||||
/// Returns VP9 information from NVDEC provided offset and size
|
||||
Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
|
||||
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
|
||||
|
||||
/// Returns frame to be decoded after buffering
|
||||
Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Use NVDEC providied information to compose the headers for the current frame
|
||||
std::vector<u8> ComposeCompressedHeader();
|
||||
VpxBitStreamWriter ComposeUncompressedHeader();
|
||||
|
||||
GPU& gpu;
|
||||
std::vector<u8> frame;
|
||||
|
||||
std::array<s8, 4> loop_filter_ref_deltas{};
|
||||
std::array<s8, 2> loop_filter_mode_deltas{};
|
||||
|
||||
bool hidden;
|
||||
s64 current_frame_number = -2; // since we buffer 2 frames
|
||||
s32 grace_period = 6; // frame offsets need to stabilize
|
||||
std::array<FrameContexts, 4> frame_ctxs{};
|
||||
Vp9FrameContainer next_frame{};
|
||||
Vp9FrameContainer next_next_frame{};
|
||||
bool swap_next_golden{};
|
||||
|
||||
Vp9PictureInfo current_frame_info{};
|
||||
Vp9EntropyProbs prev_frame_probs{};
|
||||
|
||||
s32 diff_update_probability = 252;
|
||||
s32 frame_sync_code = 0x498342;
|
||||
static constexpr std::array<s32, 254> map_lut = {
|
||||
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66,
|
||||
67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82,
|
||||
83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6,
|
||||
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113,
|
||||
114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129,
|
||||
130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
|
||||
10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160,
|
||||
161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176,
|
||||
177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
|
||||
193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Tegra
|
369
src/video_core/command_classes/codecs/vp9_types.h
Normal file
369
src/video_core/command_classes/codecs/vp9_types.h
Normal file
@ -0,0 +1,369 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
|
||||
namespace Decoder {
|
||||
struct Vp9FrameDimensions {
|
||||
s16 width{};
|
||||
s16 height{};
|
||||
s16 luma_pitch{};
|
||||
s16 chroma_pitch{};
|
||||
};
|
||||
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
|
||||
|
||||
enum FrameFlags : u32 {
|
||||
IsKeyFrame = 1 << 0,
|
||||
LastFrameIsKeyFrame = 1 << 1,
|
||||
FrameSizeChanged = 1 << 2,
|
||||
ErrorResilientMode = 1 << 3,
|
||||
LastShowFrame = 1 << 4,
|
||||
IntraOnly = 1 << 5,
|
||||
};
|
||||
|
||||
enum class MvJointType {
|
||||
MvJointZero = 0, /* Zero vector */
|
||||
MvJointHnzvz = 1, /* Vert zero, hor nonzero */
|
||||
MvJointHzvnz = 2, /* Hor zero, vert nonzero */
|
||||
MvJointHnzvnz = 3, /* Both components nonzero */
|
||||
};
|
||||
enum class MvClassType {
|
||||
MvClass0 = 0, /* (0, 2] integer pel */
|
||||
MvClass1 = 1, /* (2, 4] integer pel */
|
||||
MvClass2 = 2, /* (4, 8] integer pel */
|
||||
MvClass3 = 3, /* (8, 16] integer pel */
|
||||
MvClass4 = 4, /* (16, 32] integer pel */
|
||||
MvClass5 = 5, /* (32, 64] integer pel */
|
||||
MvClass6 = 6, /* (64, 128] integer pel */
|
||||
MvClass7 = 7, /* (128, 256] integer pel */
|
||||
MvClass8 = 8, /* (256, 512] integer pel */
|
||||
MvClass9 = 9, /* (512, 1024] integer pel */
|
||||
MvClass10 = 10, /* (1024,2048] integer pel */
|
||||
};
|
||||
|
||||
enum class BlockSize {
|
||||
Block4x4 = 0,
|
||||
Block4x8 = 1,
|
||||
Block8x4 = 2,
|
||||
Block8x8 = 3,
|
||||
Block8x16 = 4,
|
||||
Block16x8 = 5,
|
||||
Block16x16 = 6,
|
||||
Block16x32 = 7,
|
||||
Block32x16 = 8,
|
||||
Block32x32 = 9,
|
||||
Block32x64 = 10,
|
||||
Block64x32 = 11,
|
||||
Block64x64 = 12,
|
||||
BlockSizes = 13,
|
||||
BlockInvalid = BlockSizes
|
||||
};
|
||||
|
||||
enum class PredictionMode {
|
||||
DcPred = 0, // Average of above and left pixels
|
||||
VPred = 1, // Vertical
|
||||
HPred = 2, // Horizontal
|
||||
D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
|
||||
D135Pred = 4, // Directional 135 deg = 180 - 45
|
||||
D117Pred = 5, // Directional 117 deg = 180 - 63
|
||||
D153Pred = 6, // Directional 153 deg = 180 - 27
|
||||
D207Pred = 7, // Directional 207 deg = 180 + 27
|
||||
D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
|
||||
TmPred = 9, // True-motion
|
||||
NearestMv = 10,
|
||||
NearMv = 11,
|
||||
ZeroMv = 12,
|
||||
NewMv = 13,
|
||||
MbModeCount = 14
|
||||
};
|
||||
|
||||
enum class TxSize {
|
||||
Tx4x4 = 0, // 4x4 transform
|
||||
Tx8x8 = 1, // 8x8 transform
|
||||
Tx16x16 = 2, // 16x16 transform
|
||||
Tx32x32 = 3, // 32x32 transform
|
||||
TxSizes = 4
|
||||
};
|
||||
|
||||
enum class TxMode {
|
||||
Only4X4 = 0, // Only 4x4 transform used
|
||||
Allow8X8 = 1, // Allow block transform size up to 8x8
|
||||
Allow16X16 = 2, // Allow block transform size up to 16x16
|
||||
Allow32X32 = 3, // Allow block transform size up to 32x32
|
||||
TxModeSelect = 4, // Transform specified for each block
|
||||
TxModes = 5
|
||||
};
|
||||
|
||||
enum class reference_mode {
|
||||
SingleReference = 0,
|
||||
CompoundReference = 1,
|
||||
ReferenceModeSelect = 2,
|
||||
ReferenceModes = 3
|
||||
};
|
||||
|
||||
struct Segmentation {
|
||||
u8 enabled{};
|
||||
u8 update_map{};
|
||||
u8 temporal_update{};
|
||||
u8 abs_delta{};
|
||||
std::array<u32, 8> feature_mask{};
|
||||
std::array<std::array<s16, 4>, 8> feature_data{};
|
||||
};
|
||||
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
|
||||
|
||||
struct LoopFilter {
|
||||
u8 mode_ref_delta_enabled{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
};
|
||||
static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
|
||||
|
||||
struct Vp9EntropyProbs {
|
||||
std::array<u8, 36> y_mode_prob{};
|
||||
std::array<u8, 64> partition_prob{};
|
||||
std::array<u8, 2304> coef_probs{};
|
||||
std::array<u8, 8> switchable_interp_prob{};
|
||||
std::array<u8, 28> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 10> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
std::array<u8, 6> tx_32x32_prob{};
|
||||
std::array<u8, 4> tx_16x16_prob{};
|
||||
std::array<u8, 2> tx_8x8_prob{};
|
||||
std::array<u8, 3> skip_probs{};
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<u8, 20> classes{};
|
||||
std::array<u8, 2> class_0{};
|
||||
std::array<u8, 20> prob_bits{};
|
||||
std::array<u8, 12> class_0_fr{};
|
||||
std::array<u8, 6> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
};
|
||||
static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size");
|
||||
|
||||
struct Vp9PictureInfo {
|
||||
bool is_key_frame{};
|
||||
bool intra_only{};
|
||||
bool last_frame_was_key{};
|
||||
bool frame_size_changed{};
|
||||
bool error_resilient_mode{};
|
||||
bool last_frame_shown{};
|
||||
bool show_frame{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
s32 base_q_index{};
|
||||
s32 y_dc_delta_q{};
|
||||
s32 uv_dc_delta_q{};
|
||||
s32 uv_ac_delta_q{};
|
||||
bool lossless{};
|
||||
s32 transform_mode{};
|
||||
bool allow_high_precision_mv{};
|
||||
s32 interp_filter{};
|
||||
s32 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
s32 log2_tile_cols{};
|
||||
s32 log2_tile_rows{};
|
||||
bool segment_enabled{};
|
||||
bool segment_map_update{};
|
||||
bool segment_map_temporal_update{};
|
||||
s32 segment_abs_delta{};
|
||||
std::array<u32, 8> segment_feature_enable{};
|
||||
std::array<std::array<s16, 4>, 8> segment_feature_data{};
|
||||
bool mode_ref_delta_enabled{};
|
||||
bool use_prev_in_find_mv_refs{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
Vp9EntropyProbs entropy{};
|
||||
Vp9FrameDimensions frame_size{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u32 bitstream_size{};
|
||||
std::array<u64, 4> frame_offsets{};
|
||||
std::array<bool, 4> refresh_frame{};
|
||||
};
|
||||
|
||||
struct Vp9FrameContainer {
|
||||
Vp9PictureInfo info{};
|
||||
std::vector<u8> bit_stream;
|
||||
};
|
||||
|
||||
struct PictureInfo {
|
||||
INSERT_PADDING_WORDS(12);
|
||||
u32 bitstream_size{};
|
||||
INSERT_PADDING_WORDS(5);
|
||||
Vp9FrameDimensions last_frame_size{};
|
||||
Vp9FrameDimensions golden_frame_size{};
|
||||
Vp9FrameDimensions alt_frame_size{};
|
||||
Vp9FrameDimensions current_frame_size{};
|
||||
u32 vp9_flags{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u8 base_q_index{};
|
||||
u8 y_dc_delta_q{};
|
||||
u8 uv_ac_delta_q{};
|
||||
u8 uv_dc_delta_q{};
|
||||
u8 lossless{};
|
||||
u8 tx_mode{};
|
||||
u8 allow_high_precision_mv{};
|
||||
u8 interp_filter{};
|
||||
u8 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
u8 log2_tile_cols{};
|
||||
u8 log2_tile_rows{};
|
||||
Segmentation segmentation{};
|
||||
LoopFilter loop_filter{};
|
||||
INSERT_PADDING_BYTES(5);
|
||||
u32 surface_params{};
|
||||
INSERT_PADDING_WORDS(3);
|
||||
|
||||
Vp9PictureInfo Convert() const {
|
||||
|
||||
return Vp9PictureInfo{
|
||||
.is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
|
||||
.intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
|
||||
.last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
|
||||
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
|
||||
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
|
||||
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
|
||||
.ref_frame_sign_bias = ref_frame_sign_bias,
|
||||
.base_q_index = base_q_index,
|
||||
.y_dc_delta_q = y_dc_delta_q,
|
||||
.uv_dc_delta_q = uv_dc_delta_q,
|
||||
.uv_ac_delta_q = uv_ac_delta_q,
|
||||
.lossless = lossless != 0,
|
||||
.transform_mode = tx_mode,
|
||||
.allow_high_precision_mv = allow_high_precision_mv != 0,
|
||||
.interp_filter = interp_filter,
|
||||
.reference_mode = reference_mode,
|
||||
.comp_fixed_ref = comp_fixed_ref,
|
||||
.comp_var_ref = comp_var_ref,
|
||||
.log2_tile_cols = log2_tile_cols,
|
||||
.log2_tile_rows = log2_tile_rows,
|
||||
.segment_enabled = segmentation.enabled != 0,
|
||||
.segment_map_update = segmentation.update_map != 0,
|
||||
.segment_map_temporal_update = segmentation.temporal_update != 0,
|
||||
.segment_abs_delta = segmentation.abs_delta,
|
||||
.segment_feature_enable = segmentation.feature_mask,
|
||||
.segment_feature_data = segmentation.feature_data,
|
||||
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
|
||||
.use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
|
||||
!(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
|
||||
!(vp9_flags == (FrameFlags::IntraOnly)) &&
|
||||
(vp9_flags == (FrameFlags::LastShowFrame)) &&
|
||||
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
|
||||
.ref_deltas = loop_filter.ref_deltas,
|
||||
.mode_deltas = loop_filter.mode_deltas,
|
||||
.frame_size = current_frame_size,
|
||||
.first_level = first_level,
|
||||
.sharpness_level = sharpness_level,
|
||||
.bitstream_size = bitstream_size,
|
||||
};
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
|
||||
|
||||
struct EntropyProbs {
|
||||
INSERT_PADDING_BYTES(1024);
|
||||
std::array<std::array<u8, 4>, 7> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
INSERT_PADDING_BYTES(80);
|
||||
std::array<std::array<u8, 1>, 2> tx_8x8_prob{};
|
||||
std::array<std::array<u8, 2>, 2> tx_16x16_prob{};
|
||||
std::array<std::array<u8, 3>, 2> tx_32x32_prob{};
|
||||
std::array<u8, 4> y_mode_prob_e8{};
|
||||
std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
|
||||
INSERT_PADDING_BYTES(64);
|
||||
std::array<std::array<u8, 4>, 16> partition_prob{};
|
||||
INSERT_PADDING_BYTES(10);
|
||||
std::array<std::array<u8, 2>, 4> switchable_interp_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 4> skip_probs{};
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<std::array<u8, 1>, 2> class_0{};
|
||||
std::array<std::array<u8, 3>, 2> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
std::array<std::array<u8, 10>, 2> classes{};
|
||||
std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{};
|
||||
std::array<std::array<u8, 10>, 2> pred_bits{};
|
||||
std::array<std::array<u8, 2>, 5> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
INSERT_PADDING_BYTES(17);
|
||||
std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4>
|
||||
coef_probs{};
|
||||
|
||||
void Convert(Vp9EntropyProbs& fc) {
|
||||
std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size());
|
||||
|
||||
std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(),
|
||||
fc.intra_inter_prob.size());
|
||||
|
||||
std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size());
|
||||
std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size());
|
||||
std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size());
|
||||
|
||||
for (s32 i = 0; i < 4; i++) {
|
||||
for (s32 j = 0; j < 9; j++) {
|
||||
fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size());
|
||||
|
||||
std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(),
|
||||
fc.switchable_interp_prob.size());
|
||||
std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size());
|
||||
std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size());
|
||||
|
||||
std::memcpy(fc.joints.data(), joints.data(), fc.joints.size());
|
||||
|
||||
std::memcpy(fc.sign.data(), sign.data(), fc.sign.size());
|
||||
std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size());
|
||||
std::memcpy(fc.fr.data(), fr.data(), fc.fr.size());
|
||||
std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size());
|
||||
std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size());
|
||||
std::memcpy(fc.classes.data(), classes.data(), fc.classes.size());
|
||||
std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size());
|
||||
std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size());
|
||||
std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size());
|
||||
std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size());
|
||||
|
||||
std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size());
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
|
||||
|
||||
enum class Ref { Last, Golden, AltRef };
|
||||
|
||||
struct RefPoolElement {
|
||||
s64 frame{};
|
||||
Ref ref{};
|
||||
bool refresh{};
|
||||
};
|
||||
|
||||
struct FrameContexts {
|
||||
s64 from{};
|
||||
bool adapted{};
|
||||
Vp9EntropyProbs probs{};
|
||||
};
|
||||
|
||||
}; // namespace Decoder
|
||||
}; // namespace Tegra
|
Reference in New Issue
Block a user