mirror of
https://github.com/yuzu-emu/yuzu.git
synced 2025-06-18 16:47:56 -05:00
Merge branch 'master' into ssbo-align
This commit is contained in:
@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
bool FoldDerivateYFromCorrection(IR::Inst& inst) {
|
||||
bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
|
||||
const IR::Value lhs_value{inst.Arg(0)};
|
||||
const IR::Value rhs_value{inst.Arg(1)};
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
|
||||
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
if (FoldDerivateYFromCorrection(inst)) {
|
||||
if (FoldDerivativeYFromCorrection(inst)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
|
||||
bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
|
||||
if (coord.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
|
||||
IR::Inst* const inst2 = coords.InstRecursive();
|
||||
std::array<std::array<IR::Value, 3>, 3> results_matrix;
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
|
||||
if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
|
||||
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
|
||||
results_matrix[1][1], results_matrix[1][2]);
|
||||
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
|
||||
info.num_derivates.Assign(3);
|
||||
info.num_derivatives.Assign(3);
|
||||
IR::Value new_gradient_instruction =
|
||||
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
|
||||
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
|
||||
|
@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
|
||||
void PositionPass(Environment& env, IR::Program& program);
|
||||
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
|
||||
void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
|
||||
void VendorWorkaroundPass(IR::Program& program);
|
||||
void VerificationPass(const IR::Program& program);
|
||||
|
||||
// Dual Vertex
|
||||
|
79
src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
Normal file
79
src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
Normal file
@ -0,0 +1,79 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
namespace {
|
||||
void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
|
||||
/*
|
||||
* Workaround for an NVIDIA bug seen in Super Mario RPG
|
||||
*
|
||||
* We are looking for this pattern:
|
||||
* %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
|
||||
* %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
|
||||
* %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
|
||||
* %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
|
||||
* %result = IAdd32 %lhs_shl, %rhs_bfe
|
||||
*
|
||||
* And replacing the IAdd32 with a BitwiseOr32
|
||||
* %result = BitwiseOr32 %lhs_shl, %rhs_bfe
|
||||
*
|
||||
*/
|
||||
IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
|
||||
if (!lhs_shl || !rhs_bfe) {
|
||||
return;
|
||||
}
|
||||
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
||||
lhs_shl->Arg(1) != IR::Value{16U}) {
|
||||
return;
|
||||
}
|
||||
if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
|
||||
rhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
|
||||
if (!lhs_mul) {
|
||||
return;
|
||||
}
|
||||
const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
|
||||
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
|
||||
lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
|
||||
if (!lhs_bfe) {
|
||||
return;
|
||||
}
|
||||
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return;
|
||||
}
|
||||
if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||
return;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void VendorWorkaroundPass(IR::Program& program) {
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
AddingByteSwapsWorkaround(*block, inst);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
Reference in New Issue
Block a user