Merge pull request #9401 from vonchenplus/draw_manager

video_core: Implement maxwell3d draw manager and split draw logic
This commit is contained in:
Fernando S
2022-12-08 12:41:39 +01:00
committed by GitHub
12 changed files with 341 additions and 267 deletions

View File

@ -0,0 +1,191 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
DrawManager::DrawManager(Maxwell3D* maxwell3d_) : maxwell3d(maxwell3d_) {}
void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
const auto& regs{maxwell3d->regs};
switch (method) {
case MAXWELL3D_REG_INDEX(clear_surface):
return Clear(1);
case MAXWELL3D_REG_INDEX(draw.begin):
return DrawBegin();
case MAXWELL3D_REG_INDEX(draw.end):
return DrawEnd();
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
case MAXWELL3D_REG_INDEX(index_buffer.first):
break;
case MAXWELL3D_REG_INDEX(index_buffer.count):
draw_state.draw_indexed = true;
break;
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
draw_state.instance_count++;
[[fallthrough]];
case MAXWELL3D_REG_INDEX(index_buffer32_first):
case MAXWELL3D_REG_INDEX(index_buffer16_first):
case MAXWELL3D_REG_INDEX(index_buffer8_first):
return DrawIndexSmall(argument);
case MAXWELL3D_REG_INDEX(draw_inline_index):
SetInlineIndexBuffer(argument);
break;
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
SetInlineIndexBuffer(regs.inline_index_2x16.even);
SetInlineIndexBuffer(regs.inline_index_2x16.odd);
break;
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
SetInlineIndexBuffer(regs.inline_index_4x8.index0);
SetInlineIndexBuffer(regs.inline_index_4x8.index1);
SetInlineIndexBuffer(regs.inline_index_4x8.index2);
SetInlineIndexBuffer(regs.inline_index_4x8.index3);
break;
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
break;
default:
break;
}
}
void DrawManager::Clear(u32 layer_count) {
maxwell3d->rasterizer->Clear(layer_count);
}
void DrawManager::DrawDeferred() {
if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0)
return;
DrawEnd(draw_state.instance_count + 1, true);
draw_state.instance_count = 0;
}
void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances) {
draw_state.topology = topology;
draw_state.vertex_buffer.first = vertex_first;
draw_state.vertex_buffer.count = vertex_count;
draw_state.base_instance = base_instance;
ProcessDraw(false, num_instances);
}
void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count,
u32 base_index, u32 base_instance, u32 num_instances) {
const auto& regs{maxwell3d->regs};
draw_state.topology = topology;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
draw_state.base_index = base_index;
draw_state.base_instance = base_instance;
ProcessDraw(true, num_instances);
}
void DrawManager::SetInlineIndexBuffer(u32 index) {
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24));
draw_state.draw_mode = DrawMode::InlineIndex;
}
void DrawManager::DrawBegin() {
const auto& regs{maxwell3d->regs};
auto reset_instance_count = regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First;
auto increment_instance_count =
regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent;
if (reset_instance_count) {
DrawDeferred();
draw_state.instance_count = 0;
draw_state.draw_mode = DrawMode::General;
} else if (increment_instance_count) {
draw_state.instance_count++;
draw_state.draw_mode = DrawMode::Instance;
}
draw_state.topology = regs.draw.topology;
}
void DrawManager::DrawEnd(u32 instance_count, bool force_draw) {
const auto& regs{maxwell3d->regs};
switch (draw_state.draw_mode) {
case DrawMode::Instance:
if (!force_draw)
break;
[[fallthrough]];
case DrawMode::General:
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
if (draw_state.draw_indexed) {
draw_state.index_buffer = regs.index_buffer;
ProcessDraw(true, instance_count);
} else {
draw_state.vertex_buffer = regs.vertex_buffer;
ProcessDraw(false, instance_count);
}
draw_state.draw_indexed = false;
break;
case DrawMode::InlineIndex:
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.count =
static_cast<u32>(draw_state.inline_index_draw_indexes.size() / 4);
draw_state.index_buffer.format = Maxwell3D::Regs::IndexFormat::UnsignedInt;
ProcessDraw(true, instance_count);
draw_state.inline_index_draw_indexes.clear();
break;
}
}
void DrawManager::DrawIndexSmall(u32 argument) {
const auto& regs{maxwell3d->regs};
IndexBufferSmall index_small_params{argument};
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.first = index_small_params.first;
draw_state.index_buffer.count = index_small_params.count;
draw_state.topology = index_small_params.topology;
maxwell3d->dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
ProcessDraw(true, 1);
}
void DrawManager::ProcessTopologyOverride() {
if (!use_topology_override)
return;
const auto& regs{maxwell3d->regs};
switch (regs.topology_override) {
case PrimitiveTopologyOverride::None:
break;
case PrimitiveTopologyOverride::Points:
draw_state.topology = PrimitiveTopology::Points;
break;
case PrimitiveTopologyOverride::Lines:
draw_state.topology = PrimitiveTopology::Lines;
break;
case PrimitiveTopologyOverride::LineStrip:
draw_state.topology = PrimitiveTopology::LineStrip;
break;
default:
draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override);
break;
}
}
void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology.Value(),
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
ProcessTopologyOverride();
if (maxwell3d->ShouldExecute())
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
}
} // namespace Tegra::Engines

View File

@ -0,0 +1,69 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
using IndexBuffer = Maxwell3D::Regs::IndexBuffer;
using VertexBuffer = Maxwell3D::Regs::VertexBuffer;
using IndexBufferSmall = Maxwell3D::Regs::IndexBufferSmall;
class DrawManager {
public:
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
struct State {
PrimitiveTopology topology{};
DrawMode draw_mode{};
bool draw_indexed{};
u32 base_index{};
VertexBuffer vertex_buffer;
IndexBuffer index_buffer;
u32 base_instance{};
u32 instance_count{};
std::vector<u8> inline_index_draw_indexes;
};
explicit DrawManager(Maxwell3D* maxwell_3d);
void ProcessMethodCall(u32 method, u32 argument);
void Clear(u32 layer_count);
void DrawDeferred();
void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances);
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
const State& GetDrawState() const {
return draw_state;
}
private:
void SetInlineIndexBuffer(u32 index);
void DrawBegin();
void DrawEnd(u32 instance_count = 1, bool force_draw = false);
void DrawIndexSmall(u32 argument);
void ProcessTopologyOverride();
void ProcessDraw(bool draw_indexed, u32 instance_count);
Maxwell3D* maxwell3d{};
State draw_state{};
bool use_topology_override{};
};
} // namespace Tegra::Engines

View File

@ -7,6 +7,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@ -21,8 +22,10 @@ using VideoCore::QueryType;
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
: system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
upload_state{memory_manager, regs.upload} {
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_},
memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{
memory_manager,
regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
}
@ -116,16 +119,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
shadow_state = regs;
draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true;
draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true;
draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true;
draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true;
draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true;
draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true;
draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true;
draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true;
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
@ -213,29 +206,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(3);
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(index_buffer32_first):
regs.index_buffer.count = regs.index_buffer32_first.count;
regs.index_buffer.first = regs.index_buffer32_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer16_first):
regs.index_buffer.count = regs.index_buffer16_first.count;
regs.index_buffer.first = regs.index_buffer16_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer8_first):
regs.index_buffer.count = regs.index_buffer8_first.count;
regs.index_buffer.first = regs.index_buffer8_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
draw_indexed = true;
return ProcessDraw();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
case MAXWELL3D_REG_INDEX(clear_surface):
return ProcessClearBuffers(1);
case MAXWELL3D_REG_INDEX(report_semaphore.query):
return ProcessQueryGet();
case MAXWELL3D_REG_INDEX(render_enable.mode):
@ -254,6 +224,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return rasterizer->FragmentBarrier();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
default:
draw_manager->ProcessMethodCall(method, argument);
break;
}
}
@ -268,7 +241,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
// Execute the current macro.
macro_engine->Execute(macro_positions[entry], parameters);
ProcessDeferredDraw();
draw_manager->DrawDeferred();
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@ -291,62 +264,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
if (draw_command[method]) {
regs.reg_array[method] = method_argument;
deferred_draw_method.push_back(method);
auto update_inline_index = [&](const u32 index) {
inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16));
inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24));
draw_mode = DrawMode::InlineIndex;
};
switch (method) {
case MAXWELL3D_REG_INDEX(draw.begin): {
draw_mode =
(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
? DrawMode::Instance
: DrawMode::General;
break;
}
case MAXWELL3D_REG_INDEX(draw.end):
switch (draw_mode) {
case DrawMode::General:
ProcessDraw();
break;
case DrawMode::InlineIndex:
regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
draw_indexed = true;
ProcessDraw();
inline_index_draw_indexes.clear();
break;
case DrawMode::Instance:
break;
}
break;
case MAXWELL3D_REG_INDEX(index_buffer.count):
draw_indexed = true;
break;
case MAXWELL3D_REG_INDEX(draw_inline_index):
update_inline_index(method_argument);
break;
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
update_inline_index(regs.inline_index_2x16.even);
update_inline_index(regs.inline_index_2x16.odd);
break;
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
update_inline_index(regs.inline_index_4x8.index0);
update_inline_index(regs.inline_index_4x8.index1);
update_inline_index(regs.inline_index_4x8.index2);
update_inline_index(regs.inline_index_4x8.index3);
break;
}
} else {
ProcessDeferredDraw();
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@ -387,35 +305,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
}
void Maxwell3D::ProcessTopologyOverride() {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
PrimitiveTopology topology{};
switch (regs.topology_override) {
case PrimitiveTopologyOverride::None:
topology = regs.draw.topology;
break;
case PrimitiveTopologyOverride::Points:
topology = PrimitiveTopology::Points;
break;
case PrimitiveTopologyOverride::Lines:
topology = PrimitiveTopology::Lines;
break;
case PrimitiveTopologyOverride::LineStrip:
topology = PrimitiveTopology::LineStrip;
break;
default:
topology = static_cast<PrimitiveTopology>(regs.topology_override);
break;
}
if (use_topology_override) {
regs.draw.topology.Assign(topology);
}
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
}
@ -635,44 +524,4 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
void Maxwell3D::ProcessClearBuffers(u32 layer_count) {
rasterizer->Clear(layer_count);
}
void Maxwell3D::ProcessDraw(u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
draw_indexed ? regs.index_buffer.count : regs.vertex_buffer.count);
ProcessTopologyOverride();
if (ShouldExecute()) {
rasterizer->Draw(draw_indexed, instance_count);
}
draw_indexed = false;
deferred_draw_method.clear();
}
void Maxwell3D::ProcessDeferredDraw() {
if (draw_mode != DrawMode::Instance || deferred_draw_method.empty()) {
return;
}
const auto method_count = deferred_draw_method.size();
u32 instance_count = 1;
u32 vertex_buffer_count = 0;
u32 index_buffer_count = 0;
for (size_t index = 0; index < method_count; ++index) {
const u32 method = deferred_draw_method[index];
if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) {
instance_count = ++vertex_buffer_count;
} else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
instance_count = ++index_buffer_count;
}
}
ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?");
ProcessDraw(instance_count);
}
} // namespace Tegra::Engines

View File

@ -37,6 +37,8 @@ class RasterizerInterface;
namespace Tegra::Engines {
class DrawManager;
/**
* This Engine is known as GF100_3D. Documentation can be found in:
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/3d/clb197.h
@ -2223,6 +2225,7 @@ public:
struct IndexBufferSmall {
union {
u32 raw;
BitField<0, 16, u32> first;
BitField<16, 12, u32> count;
BitField<28, 4, PrimitiveTopology> topology;
@ -3061,10 +3064,8 @@ public:
Tables tables{};
} dirty;
std::vector<u8> inline_index_draw_indexes;
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers(u32 layer_count);
std::unique_ptr<DrawManager> draw_manager;
friend class DrawManager;
private:
void InitializeRegisterDefaults();
@ -3122,15 +3123,6 @@ private:
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
/// Handles deferred draw(e.g., instance draw).
void ProcessDeferredDraw();
/// Handles a draw.
void ProcessDraw(u32 instance_count = 1);
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
@ -3153,13 +3145,6 @@ private:
Upload::State upload_state;
bool execute_on{true};
bool use_topology_override{false};
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
DrawMode draw_mode{DrawMode::General};
bool draw_indexed{};
};
#define ASSERT_REG_POSITION(field_name, position) \