mirror of
https://github.com/yuzu-emu/yuzu-android.git
synced 2025-06-12 22:38:10 -05:00
video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
This commit is contained in:
@ -10,7 +10,11 @@
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() = default;
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
|
||||
// so trigger on the second 32-bit write.
|
||||
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
|
||||
HandleSurfaceCopy();
|
||||
break;
|
||||
}
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
|
||||
const u32 line_a = src_2 - src_1;
|
||||
const u32 line_b = dst_2 - dst_1;
|
||||
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
|
||||
return {line_b - (excess * line_b) / line_a, excess};
|
||||
}
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
void Fermi2D::HandleSurfaceCopy() {
|
||||
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
// TODO(Subv): Only raw copies are implemented.
|
||||
ASSERT(regs.operation == Operation::SrcCopy);
|
||||
|
||||
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
|
||||
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
|
||||
u32 src_blit_x2, src_blit_y2;
|
||||
if (regs.blit_control.origin == Origin::Corner) {
|
||||
src_blit_x2 =
|
||||
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
|
||||
src_blit_y2 =
|
||||
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
|
||||
} else {
|
||||
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
|
||||
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
|
||||
}
|
||||
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
|
||||
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
|
||||
const auto [new_dst_w, src_excess_x] =
|
||||
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
|
||||
const auto [new_dst_h, src_excess_y] =
|
||||
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
|
||||
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
|
||||
src_blit_x2 = src_blit_x2 - src_excess_x;
|
||||
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
|
||||
src_blit_y2 = src_blit_y2 - src_excess_y;
|
||||
const auto [new_src_w, dst_excess_x] =
|
||||
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
|
||||
const auto [new_src_h, dst_excess_y] =
|
||||
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
|
||||
src_blit_x2 = new_src_w + src_blit_x1;
|
||||
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
|
||||
src_blit_y2 = new_src_h + src_blit_y1;
|
||||
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
|
||||
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
|
||||
dst_blit_y2};
|
||||
const Config copy_config{
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
const Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = regs.blit_control.filter,
|
||||
.src_rect = src_rect,
|
||||
.dst_rect = dst_rect,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user