video_core: Rewrite the texture cache

The current texture cache has several points that hurt maintainability
and performance. It's easy to break unrelated parts of the cache
when doing minor changes. The cache can easily forget valuable
information about the cached textures by CPU writes or simply by its
normal usage.The current texture cache has several points that hurt
maintainability and performance. It's easy to break unrelated parts
of the cache when doing minor changes. The cache can easily forget
valuable information about the cached textures by CPU writes or simply
by its normal usage.

This commit aims to address those issues.
This commit is contained in:
ReinUsesLisp
2020-12-30 02:25:23 -03:00
parent 9106ac1e6b
commit 9764c13d6d
152 changed files with 10609 additions and 8351 deletions

View File

@ -10,7 +10,11 @@
namespace Tegra::Engines {
Fermi2D::Fermi2D() = default;
Fermi2D::Fermi2D() {
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
}
Fermi2D::~Fermi2D() = default;
@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Fermi2D register, increase the size of the Regs structure");
regs.reg_array[method] = method_argument;
switch (method) {
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
// so trigger on the second 32-bit write.
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
HandleSurfaceCopy();
break;
}
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
Blit();
}
}
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
for (u32 i = 0; i < amount; ++i) {
CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
const u32 line_a = src_2 - src_1;
const u32 line_b = dst_2 - dst_1;
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
return {line_b - (excess * line_b) / line_a, excess};
}
void Fermi2D::Blit() {
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
regs.src.Address(), regs.dst.Address());
void Fermi2D::HandleSurfaceCopy() {
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
// TODO(Subv): Only raw copies are implemented.
ASSERT(regs.operation == Operation::SrcCopy);
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
u32 src_blit_x2, src_blit_y2;
if (regs.blit_control.origin == Origin::Corner) {
src_blit_x2 =
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
src_blit_y2 =
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
} else {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
const auto [new_dst_w, src_excess_x] =
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
const auto [new_dst_h, src_excess_y] =
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
src_blit_x2 = src_blit_x2 - src_excess_x;
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
src_blit_y2 = src_blit_y2 - src_excess_y;
const auto [new_src_w, dst_excess_x] =
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
const auto [new_src_h, dst_excess_y] =
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
src_blit_x2 = new_src_w + src_blit_x1;
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
src_blit_y2 = new_src_h + src_blit_y1;
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
dst_blit_y2};
const Config copy_config{
const auto& args = regs.pixels_from_memory;
const Config config{
.operation = regs.operation,
.filter = regs.blit_control.filter,
.src_rect = src_rect,
.dst_rect = dst_rect,
.filter = args.sample_mode.filter,
.dst_x0 = args.dst_x0,
.dst_y0 = args.dst_y0,
.dst_x1 = args.dst_x0 + args.dst_width,
.dst_y1 = args.dst_y0 + args.dst_height,
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
UNIMPLEMENTED();
}
}