Merge pull request #2735 from FernandoS27/pipeline-rework

Rework Dirty Flags in GPU Pipeline, Optimize CBData and Redo Clearing mechanism
This commit is contained in:
bunnei
2019-07-21 00:59:52 -04:00
committed by GitHub
14 changed files with 528 additions and 116 deletions

View File

@ -105,6 +105,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
clear_framebuffer.Create();
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
@ -124,10 +125,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_attrib_format) {
if (!gpu.dirty.vertex_attrib_format) {
return state.draw.vertex_array;
}
gpu.dirty_flags.vertex_attrib_format = false;
gpu.dirty.vertex_attrib_format = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
@ -181,7 +182,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array.set();
gpu.dirty.ResetVertexArrays();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@ -189,17 +190,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (gpu.dirty_flags.vertex_array.none())
if (!gpu.dirty.vertex_array_buffers)
return;
gpu.dirty.vertex_array_buffers = false;
const auto& regs = gpu.regs;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!gpu.dirty_flags.vertex_array[index])
if (!gpu.dirty.vertex_array[index])
continue;
gpu.dirty.vertex_array[index] = false;
gpu.dirty.vertex_instance[index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled())
@ -224,8 +228,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
glVertexArrayBindingDivisor(vao, index, 0);
}
}
}
gpu.dirty_flags.vertex_array.reset();
void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
auto& gpu = system.GPU().Maxwell3D();
if (!gpu.dirty.vertex_instances)
return;
gpu.dirty.vertex_instances = false;
const auto& regs = gpu.regs;
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!gpu.dirty.vertex_instance[index])
continue;
gpu.dirty.vertex_instance[index] = false;
if (regs.instanced_arrays.IsInstancingEnabled(index) &&
regs.vertex_array[index].divisor != 0) {
// Enable vertex buffer instancing with the specified divisor.
glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
} else {
// Disable the vertex buffer instancing.
glVertexArrayBindingDivisor(vao, index, 0);
}
}
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@ -341,7 +369,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SyncClipEnabled(clip_distances);
gpu.dirty_flags.shaders = false;
gpu.dirty.shaders = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@ -424,13 +452,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
if (fb_config_state == current_framebuffer_config_state &&
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
return current_depth_stencil_usage;
}
gpu.dirty.render_settings = false;
current_framebuffer_config_state = fb_config_state;
texture_cache.GuardRenderTargets(true);
@ -519,13 +547,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
}
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
bool using_depth_fb, bool using_stencil_fb) {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
texture_cache.GuardRenderTargets(true);
View color_surface{};
if (using_color_fb) {
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
}
View depth_surface{};
if (using_depth_fb || using_stencil_fb) {
depth_surface = texture_cache.GetDepthBufferSurface(false);
}
texture_cache.GuardRenderTargets(false);
current_state.draw.draw_framebuffer = clear_framebuffer.handle;
current_state.ApplyFramebufferState();
if (color_surface) {
color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
} else {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
}
if (depth_surface) {
const auto& params = depth_surface->GetSurfaceParams();
switch (params.type) {
case VideoCore::Surface::SurfaceType::Depth: {
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
break;
}
case VideoCore::Surface::SurfaceType::DepthStencil: {
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
break;
}
default: { UNIMPLEMENTED(); }
}
} else {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
}
}
void RasterizerOpenGL::Clear() {
const auto& regs = system.GPU().Maxwell3D().regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
OpenGLState clear_state;
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({
prev_state.AllDirty();
prev_state.Apply();
});
OpenGLState clear_state{OpenGLState::GetCurState()};
clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
@ -545,6 +625,7 @@ void RasterizerOpenGL::Clear() {
// true.
clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS;
clear_state.depth.write_mask = GL_TRUE;
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@ -581,8 +662,9 @@ void RasterizerOpenGL::Clear() {
return;
}
const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
SyncViewport(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@ -591,21 +673,18 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
clear_state.ApplyColorMask();
clear_state.ApplyDepth();
clear_state.ApplyStencilTest();
clear_state.ApplyViewport();
clear_state.ApplyFramebufferState();
clear_state.AllDirty();
clear_state.Apply();
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
}
if (clear_depth && clear_stencil) {
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (clear_depth) {
} else if (use_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
} else if (clear_stencil) {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
@ -661,6 +740,7 @@ void RasterizerOpenGL::DrawArrays() {
// Upload vertex and index data.
SetupVertexBuffer(vao);
SetupVertexInstances(vao);
const GLintptr index_buffer_offset = SetupIndexBuffer();
// Setup draw parameters. It will automatically choose what glDraw* method to use.
@ -687,7 +767,7 @@ void RasterizerOpenGL::DrawArrays() {
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty_flags.vertex_array.set();
gpu.dirty.ResetVertexArrays();
}
shader_program_manager->ApplyTo(state);
@ -700,6 +780,7 @@ void RasterizerOpenGL::DrawArrays() {
params.DispatchDraw();
accelerate_draw = AccelDraw::Disabled;
gpu.dirty.memory_general = false;
}
void RasterizerOpenGL::FlushAll() {}
@ -907,10 +988,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
const auto& regs = system.GPU().Maxwell3D().regs;
auto& maxwell3d = system.GPU().Maxwell3D();
const auto& regs = maxwell3d.regs;
state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@ -943,16 +1025,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
if (!state.depth.test_enabled)
if (!state.depth.test_enabled) {
return;
}
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncStencilTestState() {
const auto& regs = system.GPU().Maxwell3D().regs;
state.stencil.test_enabled = regs.stencil_enable != 0;
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.stencil_test) {
return;
}
const auto& regs = maxwell3d.regs;
state.stencil.test_enabled = regs.stencil_enable != 0;
if (!regs.stencil_enable) {
return;
}
@ -981,10 +1068,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.back.action_depth_fail = GL_KEEP;
state.stencil.back.action_depth_pass = GL_KEEP;
}
state.MarkDirtyStencilState();
maxwell3d.dirty.stencil_test = false;
}
void RasterizerOpenGL::SyncColorMask() {
const auto& regs = system.GPU().Maxwell3D().regs;
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.color_mask) {
return;
}
const auto& regs = maxwell3d.regs;
const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) {
@ -995,6 +1089,9 @@ void RasterizerOpenGL::SyncColorMask() {
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
}
state.MarkDirtyColorMask();
maxwell3d.dirty.color_mask = false;
}
void RasterizerOpenGL::SyncMultiSampleState() {
@ -1009,7 +1106,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
}
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = system.GPU().Maxwell3D().regs;
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.blend_state) {
return;
}
const auto& regs = maxwell3d.regs;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
@ -1032,6 +1133,8 @@ void RasterizerOpenGL::SyncBlendState() {
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false;
}
maxwell3d.dirty.blend_state = false;
state.MarkDirtyBlendState();
return;
}
@ -1048,6 +1151,9 @@ void RasterizerOpenGL::SyncBlendState() {
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
}
state.MarkDirtyBlendState();
maxwell3d.dirty.blend_state = false;
}
void RasterizerOpenGL::SyncLogicOpState() {
@ -1099,13 +1205,21 @@ void RasterizerOpenGL::SyncPointState() {
}
void RasterizerOpenGL::SyncPolygonOffset() {
const auto& regs = system.GPU().Maxwell3D().regs;
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.polygon_offset) {
return;
}
const auto& regs = maxwell3d.regs;
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
state.polygon_offset.units = regs.polygon_offset_units;
state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp;
state.MarkDirtyPolygonOffset();
maxwell3d.dirty.polygon_offset = false;
}
void RasterizerOpenGL::SyncAlphaTest() {

View File

@ -108,6 +108,9 @@ private:
OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
bool using_depth_fb, bool using_stencil_fb);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader);
@ -216,6 +219,7 @@ private:
GLuint SetupVertexFormat();
void SetupVertexBuffer(GLuint vao);
void SetupVertexInstances(GLuint vao);
GLintptr SetupIndexBuffer();
@ -226,6 +230,8 @@ private:
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
OGLFramebuffer clear_framebuffer;
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
};

View File

@ -572,7 +572,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
if (!system.GPU().Maxwell3D().dirty.shaders) {
return last_shaders[static_cast<std::size_t>(program)];
}

View File

@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {
alpha_test.ref = 0.0f;
}
void OpenGLState::SetDefaultViewports() {
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
item.width = 0;
item.height = 0;
item.depth_range_near = 0.0f;
item.depth_range_far = 1.0f;
item.scissor.enabled = false;
item.scissor.x = 0;
item.scissor.y = 0;
item.scissor.width = 0;
item.scissor.height = 0;
}
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
}
void OpenGLState::ApplyDefaultState() {
glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {
}
}
void OpenGLState::Apply() const {
void OpenGLState::Apply() {
MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState();
ApplyVertexArrayState();
@ -536,19 +555,31 @@ void OpenGLState::Apply() const {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
if (dirty.color_mask) {
ApplyColorMask();
dirty.color_mask = false;
}
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
ApplyStencilTest();
if (dirty.stencil_state) {
ApplyStencilTest();
dirty.stencil_state = false;
}
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
ApplyBlending();
if (dirty.blend_state) {
ApplyBlending();
dirty.blend_state = false;
}
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
if (dirty.polygon_offset) {
ApplyPolygonOffset();
dirty.polygon_offset = false;
}
ApplyAlphaTest();
}

View File

@ -195,8 +195,9 @@ public:
s_rgb_used = false;
}
void SetDefaultViewports();
/// Apply this state as the current OpenGL state
void Apply() const;
void Apply();
void ApplyFramebufferState() const;
void ApplyVertexArrayState() const;
@ -237,11 +238,41 @@ public:
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
void MarkDirtyBlendState() {
dirty.blend_state = true;
}
void MarkDirtyStencilState() {
dirty.stencil_state = true;
}
void MarkDirtyPolygonOffset() {
dirty.polygon_offset = true;
}
void MarkDirtyColorMask() {
dirty.color_mask = true;
}
void AllDirty() {
dirty.blend_state = true;
dirty.stencil_state = true;
dirty.polygon_offset = true;
dirty.color_mask = true;
}
private:
static OpenGLState cur_state;
// Workaround for sRGB problems caused by QT not supporting srgb output
static bool s_rgb_used;
struct {
bool blend_state;
bool stencil_state;
bool viewport_state;
bool polygon_offset;
bool color_mask;
} dirty{};
};
} // namespace OpenGL

View File

@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
SCOPE_EXIT({
prev_state.AllDirty();
prev_state.Apply();
});
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
state.AllDirty();
state.Apply();
u32 buffers{};

View File

@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
state.Apply();
if (framebuffer) {
@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
system.GetPerfStats().BeginSystemFrame();
// Restore the rasterizer state
prev_state.AllDirty();
prev_state.Apply();
}
@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Link shaders and get variable locations
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
state.draw.shader_program = shader.handle;
state.AllDirty();
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
state.texture_units[0].texture = 0;
state.AllDirty();
state.Apply();
// Clear sRGB state for the next frame
OpenGLState::ClearsRGBUsed();
@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint old_read_fb = state.draw.read_framebuffer;
GLuint old_draw_fb = state.draw.draw_framebuffer;
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
state.AllDirty();
state.Apply();
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
screenshot_framebuffer.Release();
state.draw.read_framebuffer = old_read_fb;
state.draw.draw_framebuffer = old_draw_fb;
state.AllDirty();
state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);