Merge pull request #2976 from FernandoS27/cache-fast-brx-rebased

Implement Fast BRX, fix TXQ and addapt the Shader Cache for it
This commit is contained in:
Rodrigo Locatti
2019-10-26 16:56:13 -03:00
committed by GitHub
29 changed files with 1492 additions and 872 deletions

View File

@ -228,6 +228,10 @@ public:
inner += expr.value ? "true" : "false";
}
void operator()(const ExprGprEqual& expr) {
inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
}
const std::string& GetResult() const {
return inner;
}

View File

@ -0,0 +1,110 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <memory>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/const_buffer_locker.h"
namespace VideoCommon::Shader {
using Tegra::Engines::SamplerDescriptor;
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
: stage{shader_stage} {}
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine)
: stage{shader_stage}, engine{&engine} {}
ConstBufferLocker::~ConstBufferLocker() = default;
std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
const std::pair<u32, u32> key = {buffer, offset};
const auto iter = keys.find(key);
if (iter != keys.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
keys.emplace(key, value);
return value;
}
std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
const u32 key = offset;
const auto iter = bound_samplers.find(key);
if (iter != bound_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
bound_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
bindless_samplers.emplace(key, value);
return value;
}
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
bound_samplers.insert_or_assign(offset, sampler);
}
void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
bool ConstBufferLocker::IsConsistent() const {
if (!engine) {
return false;
}
return std::all_of(keys.begin(), keys.end(),
[this](const auto& pair) {
const auto [cbuf, offset] = pair.first;
const auto value = pair.second;
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
}) &&
std::all_of(bound_samplers.begin(), bound_samplers.end(),
[this](const auto& sampler) {
const auto [key, value] = sampler;
return value == engine->AccessBoundSampler(stage, key);
}) &&
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
[this](const auto& sampler) {
const auto [cbuf, offset] = sampler.first;
const auto value = sampler.second;
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
});
}
bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
bindless_samplers == rhs.bindless_samplers;
}
} // namespace VideoCommon::Shader

View File

@ -0,0 +1,80 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <unordered_map>
#include "common/common_types.h"
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
/**
* The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
* compiler. with it, the shader can obtain required data from GPU state and store it for disk
* shader compilation.
**/
class ConstBufferLocker {
public:
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine);
~ConstBufferLocker();
/// Retrieves a key from the locker, if it's registered, it will give the registered value, if
/// not it will obtain it from maxwell3d and register it.
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
/// Inserts a bound sampler key.
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
/// the same value, false otherwise;
bool IsConsistent() const;
/// Returns true if the keys are equal to the other ones in the locker.
bool HasEqualKeys(const ConstBufferLocker& rhs) const;
/// Gives an getter to the const buffer keys in the database.
const KeyMap& GetKeys() const {
return keys;
}
/// Gets samplers database.
const BoundSamplerMap& GetBoundSamplers() const {
return bound_samplers;
}
/// Gets bindless samplers database.
const BindlessSamplerMap& GetBindlessSamplers() const {
return bindless_samplers;
}
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
};
} // namespace VideoCommon::Shader

View File

@ -35,14 +35,20 @@ struct BlockStack {
std::stack<u32> pbk_stack{};
};
struct BlockBranchInfo {
Condition condition{};
s32 address{exit_branch};
bool kill{};
bool is_sync{};
bool is_brk{};
bool ignore{};
};
template <typename T, typename... Args>
BlockBranchInfo MakeBranchInfo(Args&&... args) {
static_assert(std::is_convertible_v<T, BranchData>);
return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
}
bool BlockBranchIsIgnored(BlockBranchInfo first) {
bool ignore = false;
if (std::holds_alternative<SingleBranch>(*first)) {
const auto branch = std::get_if<SingleBranch>(first.get());
ignore = branch->ignore;
}
return ignore;
}
struct BlockInfo {
u32 start{};
@ -56,10 +62,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
const u32 start)
: start{start}, program_code{program_code}, program_size{program_size} {}
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
: program_code{program_code}, start{start}, locker{locker} {}
const ProgramCode& program_code;
ConstBufferLocker& locker;
u32 start{};
std::vector<BlockInfo> block_info{};
std::list<u32> inspect_queries{};
@ -69,8 +76,6 @@ struct CFGRebuildState {
std::map<u32, u32> ssy_labels{};
std::map<u32, u32> pbk_labels{};
std::unordered_map<u32, BlockStack> stacks{};
const ProgramCode& program_code;
const std::size_t program_size;
ASTManager* manager;
};
@ -124,10 +129,116 @@ enum class ParseResult : u32 {
AbnormalFlow,
};
struct BranchIndirectInfo {
u32 buffer{};
u32 offset{};
u32 entries{};
s32 relative_position{};
};
std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
u32 start_address, u32 current_position) {
const u32 shader_start = state.start;
u32 pos = current_position;
BranchIndirectInfo result{};
u64 track_register = 0;
// Step 0 Get BRX Info
const Instruction instr = {state.program_code[pos]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() != OpCode::Id::BRX) {
return std::nullopt;
}
if (instr.brx.constant_buffer != 0) {
return std::nullopt;
}
track_register = instr.gpr8.Value();
result.relative_position = instr.brx.GetBranchExtend();
pos--;
bool found_track = false;
// Step 1 Track LDC
while (pos >= shader_start) {
if (IsSchedInstruction(pos, shader_start)) {
pos--;
continue;
}
const Instruction instr = {state.program_code[pos]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::LD_C) {
if (instr.gpr0.Value() == track_register &&
instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
result.buffer = instr.cbuf36.index.Value();
result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
track_register = instr.gpr8.Value();
pos--;
found_track = true;
break;
}
}
pos--;
}
if (!found_track) {
return std::nullopt;
}
found_track = false;
// Step 2 Track SHL
while (pos >= shader_start) {
if (IsSchedInstruction(pos, shader_start)) {
pos--;
continue;
}
const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
if (instr.gpr0.Value() == track_register) {
track_register = instr.gpr8.Value();
pos--;
found_track = true;
break;
}
}
pos--;
}
if (!found_track) {
return std::nullopt;
}
found_track = false;
// Step 3 Track IMNMX
while (pos >= shader_start) {
if (IsSchedInstruction(pos, shader_start)) {
pos--;
continue;
}
const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
if (instr.gpr0.Value() == track_register) {
track_register = instr.gpr8.Value();
result.entries = instr.alu.GetSignedImm20_20() + 1;
pos--;
found_track = true;
break;
}
}
pos--;
}
if (!found_track) {
return std::nullopt;
}
return result;
}
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
u32 offset = static_cast<u32>(address);
const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
const u32 end_address = static_cast<u32>(state.program_code.size());
ParseInfo parse_info{};
SingleBranch single_branch{};
const auto insert_label = [](CFGRebuildState& state, u32 address) {
const auto pair = state.labels.emplace(address);
@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
if (offset >= end_address) {
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
ASSERT_MSG(false, "Shader passed the current limit!");
parse_info.branch_info.address = exit_branch;
parse_info.branch_info.ignore = false;
single_branch.address = exit_branch;
single_branch.ignore = false;
break;
}
if (state.registered.count(offset) != 0) {
parse_info.branch_info.address = offset;
parse_info.branch_info.ignore = true;
single_branch.address = offset;
single_branch.ignore = true;
break;
}
if (IsSchedInstruction(offset, state.start)) {
@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
parse_info.branch_info.condition.predicate =
GetPredicate(pred_index, instr.negate_pred != 0);
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
parse_info.branch_info.condition.cc = cc;
single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
parse_info.branch_info.address = exit_branch;
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
parse_info.branch_info.ignore = false;
single_branch.address = exit_branch;
single_branch.kill = false;
single_branch.is_sync = false;
single_branch.is_brk = false;
single_branch.ignore = false;
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill,
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
return {ParseResult::AbnormalFlow, parse_info};
}
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
parse_info.branch_info.condition.predicate =
GetPredicate(pred_index, instr.negate_pred != 0);
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
parse_info.branch_info.condition.cc = cc;
single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
if (branch_offset == 0) {
parse_info.branch_info.address = exit_branch;
single_branch.address = exit_branch;
} else {
parse_info.branch_info.address = branch_offset;
single_branch.address = branch_offset;
}
insert_label(state, branch_offset);
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
parse_info.branch_info.ignore = false;
single_branch.kill = false;
single_branch.is_sync = false;
single_branch.is_brk = false;
single_branch.ignore = false;
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill,
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::SYNC: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
parse_info.branch_info.condition.predicate =
GetPredicate(pred_index, instr.negate_pred != 0);
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
parse_info.branch_info.condition.cc = cc;
single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
parse_info.branch_info.address = unassigned_branch;
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = true;
parse_info.branch_info.is_brk = false;
parse_info.branch_info.ignore = false;
single_branch.address = unassigned_branch;
single_branch.kill = false;
single_branch.is_sync = true;
single_branch.is_brk = false;
single_branch.ignore = false;
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill,
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::BRK: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
parse_info.branch_info.condition.predicate =
GetPredicate(pred_index, instr.negate_pred != 0);
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
parse_info.branch_info.condition.cc = cc;
single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
parse_info.branch_info.address = unassigned_branch;
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = true;
parse_info.branch_info.ignore = false;
single_branch.address = unassigned_branch;
single_branch.kill = false;
single_branch.is_sync = false;
single_branch.is_brk = true;
single_branch.ignore = false;
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill,
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::KIL: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
parse_info.branch_info.condition.predicate =
GetPredicate(pred_index, instr.negate_pred != 0);
if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
parse_info.branch_info.condition.cc = cc;
single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
parse_info.branch_info.address = exit_branch;
parse_info.branch_info.kill = true;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
parse_info.branch_info.ignore = false;
single_branch.address = exit_branch;
single_branch.kill = true;
single_branch.is_sync = false;
single_branch.is_brk = false;
single_branch.ignore = false;
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill,
single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
auto tmp = TrackBranchIndirectInfo(state, address, offset);
if (tmp) {
auto result = *tmp;
std::vector<CaseBranch> branches{};
s32 pc_target = offset + result.relative_position;
for (u32 i = 0; i < result.entries; i++) {
auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
if (!k) {
return {ParseResult::AbnormalFlow, parse_info};
}
u32 value = *k;
u32 target = static_cast<u32>((value >> 3) + pc_target);
insert_label(state, target);
branches.emplace_back(value, target);
}
parse_info.end_address = offset;
parse_info.branch_info = MakeBranchInfo<MultiBranch>(
static_cast<u32>(instr.gpr8.Value()), std::move(branches));
return {ParseResult::ControlCaught, parse_info};
} else {
LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
}
return {ParseResult::AbnormalFlow, parse_info};
}
default:
@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
offset++;
}
parse_info.branch_info.kill = false;
parse_info.branch_info.is_sync = false;
parse_info.branch_info.is_brk = false;
single_branch.kill = false;
single_branch.is_sync = false;
single_branch.is_brk = false;
parse_info.end_address = offset - 1;
parse_info.branch_info = MakeBranchInfo<SingleBranch>(
single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
single_branch.is_brk, single_branch.ignore);
return {ParseResult::BlockEnd, parse_info};
}
@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& current_block = state.block_info[block_index];
current_block.end = address - 1;
new_block.branch = current_block.branch;
BlockBranchInfo forward_branch{};
forward_branch.address = address;
forward_branch.ignore = true;
BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
const auto branch = std::get_if<SingleBranch>(forward_branch.get());
branch->address = address;
branch->ignore = true;
current_block.branch = forward_branch;
return true;
}
@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
block_info.branch = parse_info.branch_info;
if (parse_info.branch_info.condition.IsUnconditional()) {
if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
if (branch->condition.IsUnconditional()) {
return true;
}
const u32 fallthrough_address = parse_info.end_address + 1;
state.inspect_queries.push_front(fallthrough_address);
return true;
}
const u32 fallthrough_address = parse_info.end_address + 1;
state.inspect_queries.push_front(fallthrough_address);
return true;
}
@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) {
state.queries.pop_front();
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
if (!block.branch.condition.IsUnconditional()) {
q2.address = block.end + 1;
state.queries.push_back(q2);
}
if (std::holds_alternative<SingleBranch>(*block.branch)) {
const auto branch = std::get_if<SingleBranch>(block.branch.get());
if (!branch->condition.IsUnconditional()) {
q2.address = block.end + 1;
state.queries.push_back(q2);
}
Query conditional_query{q2};
if (block.branch.is_sync) {
if (block.branch.address == unassigned_branch) {
block.branch.address = conditional_query.ssy_stack.top();
Query conditional_query{q2};
if (branch->is_sync) {
if (branch->address == unassigned_branch) {
branch->address = conditional_query.ssy_stack.top();
}
conditional_query.ssy_stack.pop();
}
conditional_query.ssy_stack.pop();
}
if (block.branch.is_brk) {
if (block.branch.address == unassigned_branch) {
block.branch.address = conditional_query.pbk_stack.top();
if (branch->is_brk) {
if (branch->address == unassigned_branch) {
branch->address = conditional_query.pbk_stack.top();
}
conditional_query.pbk_stack.pop();
}
conditional_query.pbk_stack.pop();
conditional_query.address = branch->address;
state.queries.push_back(std::move(conditional_query));
return true;
}
const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
for (const auto& branch_case : multi_branch->branches) {
Query conditional_query{q2};
conditional_query.address = branch_case.address;
state.queries.push_back(std::move(conditional_query));
}
conditional_query.address = block.branch.address;
state.queries.push_back(std::move(conditional_query));
return true;
}
} // Anonymous namespace
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
const auto get_expr = ([&](const Condition& cond) -> Expr {
Expr result{};
if (cond.cc != ConditionCode::T) {
@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
}
return MakeExpr<ExprBoolean>(true);
});
if (branch.address < 0) {
if (branch.kill) {
mm.InsertReturn(get_expr(branch.condition), true);
if (std::holds_alternative<SingleBranch>(*branch_info)) {
const auto branch = std::get_if<SingleBranch>(branch_info.get());
if (branch->address < 0) {
if (branch->kill) {
mm.InsertReturn(get_expr(branch->condition), true);
return;
}
mm.InsertReturn(get_expr(branch->condition), false);
return;
}
mm.InsertReturn(get_expr(branch.condition), false);
mm.InsertGoto(get_expr(branch->condition), branch->address);
return;
}
mm.InsertGoto(get_expr(branch.condition), branch.address);
const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
for (const auto& branch_case : multi_branch->branches) {
mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
branch_case.address);
}
}
void DecompileShader(CFGRebuildState& state) {
@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) {
if (state.labels.count(block.start) != 0) {
state.manager->InsertLabel(block.start);
}
u32 end = block.branch.ignore ? block.end + 1 : block.end;
const bool ignore = BlockBranchIsIgnored(block.branch);
u32 end = ignore ? block.end + 1 : block.end;
state.manager->InsertBlock(block.start, end);
if (!block.branch.ignore) {
if (!ignore) {
InsertBranch(*state.manager, block.branch);
}
}
state.manager->Decompile();
}
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
const CompilerSettings& settings) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
CFGRebuildState state{program_code, program_size, start_address};
CFGRebuildState state{program_code, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
ShaderBlock new_block{};
new_block.start = block.start;
new_block.end = block.end;
new_block.ignore_branch = block.branch.ignore;
new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
if (!new_block.ignore_branch) {
new_block.branch.cond = block.branch.condition;
new_block.branch.kills = block.branch.kill;
new_block.branch.address = block.branch.address;
new_block.branch = block.branch;
}
result_out->end = std::max(result_out->end, block.end);
result_out->blocks.push_back(new_block);

View File

@ -7,6 +7,7 @@
#include <list>
#include <optional>
#include <set>
#include <variant>
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
@ -37,29 +38,61 @@ struct Condition {
}
};
class SingleBranch {
public:
SingleBranch() = default;
SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
bool ignore)
: condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
ignore{ignore} {}
bool operator==(const SingleBranch& b) const {
return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
}
bool operator!=(const SingleBranch& b) const {
return !operator==(b);
}
Condition condition{};
s32 address{exit_branch};
bool kill{};
bool is_sync{};
bool is_brk{};
bool ignore{};
};
struct CaseBranch {
CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
u32 cmp_value;
u32 address;
};
class MultiBranch {
public:
MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
: gpr{gpr}, branches{std::move(branches)} {}
u32 gpr{};
std::vector<CaseBranch> branches{};
};
using BranchData = std::variant<SingleBranch, MultiBranch>;
using BlockBranchInfo = std::shared_ptr<BranchData>;
bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
struct ShaderBlock {
struct Branch {
Condition cond{};
bool kills{};
s32 address{};
bool operator==(const Branch& b) const {
return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
}
bool operator!=(const Branch& b) const {
return !operator==(b);
}
};
u32 start{};
u32 end{};
bool ignore_branch{};
Branch branch{};
BlockBranchInfo branch{};
bool operator==(const ShaderBlock& sb) const {
return std::tie(start, end, ignore_branch, branch) ==
std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
return std::tie(start, end, ignore_branch) ==
std::tie(sb.start, sb.end, sb.ignore_branch) &&
BlockBranchInfoAreEqual(branch, sb.branch);
}
bool operator!=(const ShaderBlock& sb) const {
@ -76,8 +109,8 @@ struct ShaderCharacteristics {
CompilerSettings settings{};
};
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
const CompilerSettings& settings);
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker);
} // namespace VideoCommon::Shader

View File

@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
} // namespace
} // Anonymous namespace
class ASTDecoder {
public:
@ -102,7 +102,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
auto info = ScanFlow(program_code, program_size, main_offset, settings);
auto info = ScanFlow(program_code, main_offset, settings, locker);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@ -155,7 +155,7 @@ void ShaderIR::Decode() {
[[fallthrough]];
case CompileDepth::BruteForce: {
coverage_begin = main_offset;
const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
const std::size_t shader_end = program_code.size();
coverage_end = shader_end;
for (u32 label = main_offset; label < shader_end; label++) {
basic_blocks.insert({label, DecodeRange(label, label + 1)});
@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
}
return result;
};
if (block.branch.address < 0) {
if (block.branch.kills) {
Node n = Operation(OperationCode::Discard);
n = apply_conditions(block.branch.cond, n);
if (std::holds_alternative<SingleBranch>(*block.branch)) {
auto branch = std::get_if<SingleBranch>(block.branch.get());
if (branch->address < 0) {
if (branch->kill) {
Node n = Operation(OperationCode::Discard);
n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
Node n = Operation(OperationCode::Exit);
n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
Node n = Operation(OperationCode::Exit);
n = apply_conditions(block.branch.cond, n);
Node n = Operation(OperationCode::Branch, Immediate(branch->address));
n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
n = apply_conditions(block.branch.cond, n);
bb.push_back(n);
global_code.push_back(n);
auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
Node op_a = GetRegister(multi_branch->gpr);
for (auto& branch_case : multi_branch->branches) {
Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
Node op_b = Immediate(branch_case.cmp_value);
Node condition =
GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
auto result = Conditional(condition, {n});
bb.push_back(result);
global_code.push_back(result);
}
}
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {

View File

@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
is_bindless
? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
false)
: GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
u32 indexer = 0;
switch (instr.txq.query_type) {
@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = is_bindless
? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
: GetSampler(instr.sampler, texture_type, is_array, false);
const auto& sampler =
is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
: GetSampler(instr.sampler, {{texture_type, is_array, false}});
std::vector<Node> coords;
@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
bool is_array, bool is_shadow) {
const auto offset = static_cast<std::size_t>(sampler.index.Value());
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value());
Tegra::Shader::TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
} else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
type = Tegra::Shader::TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
const auto itr =
@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
} // namespace VideoCommon::Shader
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
bool is_array, bool is_shadow) {
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
const auto [base_sampler, cbuf_index, cbuf_offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
Tegra::Shader::TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
} else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
type = Tegra::Shader::TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
const auto itr =
@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
const auto& sampler = is_bindless
? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
: GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const auto& sampler =
is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
: GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
dc = GetRegister(parameter_register++);
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {

View File

@ -17,13 +17,14 @@ using Tegra::Shader::Pred;
class ExprAnd;
class ExprBoolean;
class ExprCondCode;
class ExprGprEqual;
class ExprNot;
class ExprOr;
class ExprPredicate;
class ExprVar;
using ExprData =
std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>;
using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
ExprBoolean, ExprGprEqual>;
using Expr = std::shared_ptr<ExprData>;
class ExprAnd final {
@ -118,6 +119,22 @@ public:
bool value;
};
class ExprGprEqual final {
public:
ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
bool operator==(const ExprGprEqual& b) const {
return gpr == b.gpr && value == b.value;
}
bool operator!=(const ExprGprEqual& b) const {
return !operator==(b);
}
u32 gpr;
u32 value;
};
template <typename T, typename... Args>
Expr MakeExpr(Args&&... args) {
static_assert(std::is_convertible_v<T, ExprData>);

View File

@ -23,10 +23,9 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size,
CompilerSettings settings)
: program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{},
program_manager{true, true}, settings{settings} {
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
}

View File

@ -17,6 +17,7 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
namespace VideoCommon::Shader {
@ -66,8 +67,8 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size,
CompilerSettings settings);
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@ -172,6 +173,13 @@ public:
private:
friend class ASTDecoder;
struct SamplerInfo {
Tegra::Shader::TextureType type;
bool is_array;
bool is_shadow;
};
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
@ -296,12 +304,11 @@ private:
/// Accesses a texture sampler
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
std::optional<SamplerInfo> sampler_info);
// Accesses a texture sampler for a bindless texture.
const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
Tegra::Shader::TextureType type, bool is_array,
bool is_shadow);
std::optional<SamplerInfo> sampler_info);
/// Accesses an image.
Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@ -377,7 +384,9 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const std::size_t program_size;
const CompilerSettings settings;
ConstBufferLocker& locker;
bool decompiled{};
bool disable_flow_stack{};
@ -386,8 +395,7 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
ASTManager program_manager;
CompilerSettings settings{};
ASTManager program_manager{true, true};
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;