nvnflinger/gpu: implement blending

This commit is contained in:
Liam
2024-01-22 12:40:50 -05:00
parent 06fd7f2012
commit 962c82540c
37 changed files with 383 additions and 141 deletions

View File

@ -11,6 +11,12 @@
namespace Tegra {
enum class BlendMode {
Opaque,
Premultiplied,
Coverage,
};
/**
* Struct describing framebuffer configuration
*/
@ -23,6 +29,7 @@ struct FramebufferConfig {
Service::android::PixelFormat pixel_format{};
Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect{};
BlendMode blending{};
};
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,

View File

@ -37,6 +37,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#define A_GPU 1
#define A_GLSL 1
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifndef YUZU_USE_FP16
#include "ffx_a.h"
@ -71,9 +72,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#include "ffx_fsr1.h"
#if USE_RCAS
layout(location = 0) in vec2 frag_texcoord;
#endif
layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos) {
@ -81,22 +80,22 @@ void CurrFilter(AU2 pos) {
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
frag_color = AF4(c, 1.0);
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
frag_color = AH4(c, 1.0);
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, Const0);
frag_color = AF4(c, 1.0);
AF4 c;
FsrRcasF(c.r, c.g, c.b, c.a, pos, Const0);
frag_color = c;
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0);
frag_color = AH4(c, 1.0);
AH4 c;
FsrRcasH(c.r, c.g, c.b, c.a, pos, Const0);
frag_color = c;
#endif
#endif
}

View File

@ -71,5 +71,5 @@ vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
}
void main() {
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
frag_color = vec4(FxaaPixelShader(posPos, input_texture), texture(input_texture, posPos.xy).a);
}

View File

@ -31,6 +31,7 @@ layout (location = 0) uniform uvec4 constants[4];
#define A_GPU 1
#define A_GLSL 1
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifdef YUZU_USE_FP16
#define A_HALF
@ -67,9 +68,7 @@ layout (location = 0) uniform uvec4 constants[4];
#include "ffx_fsr1.h"
#if USE_RCAS
layout(location = 0) in vec2 frag_texcoord;
#endif
layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos)
@ -78,22 +77,22 @@ void CurrFilter(AU2 pos)
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]);
frag_color = AF4(c, 1.0);
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]);
frag_color = AH4(c, 1.0);
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, constants[0]);
frag_color = AF4(c, 1.0);
AF4 c;
FsrRcasF(c.r, c.g, c.b, c.a, pos, constants[0]);
frag_color = c;
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, constants[0]);
frag_color = AH4(c, 1.0);
FsrRcasH(c.r, c.g, c.b, c.a, pos, constants[0]);
frag_color = c;
#endif
#endif
}

View File

@ -9,5 +9,5 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
color = vec4(texture(color_texture, frag_tex_coord));
}

View File

@ -52,5 +52,5 @@ vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
}
void main() {
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
color = textureBicubic(color_texture, frag_tex_coord);
}

View File

@ -46,14 +46,14 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
}
void main() {
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
vec4 base = texture(color_texture, vec2(frag_tex_coord)) * weight[0];
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = vec4(combination + base, 1.0f);
vec4 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset);
vec4 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset);
vec4 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset);
vec4 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0));
vec4 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = combination + base;
}

View File

@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_EASU 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_EASU 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_RCAS 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_RCAS 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@ -5,7 +5,7 @@
#extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#define VERSION 2
#define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag"

View File

@ -5,6 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#define VERSION 2
#include "opengl_present_scaleforce.frag"

View File

@ -92,6 +92,21 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
glClear(GL_COLOR_BUFFER_BIT);
for (size_t i = 0; i < layer_count; i++) {
switch (framebuffers[i].blending) {
case Tegra::BlendMode::Opaque:
default:
glDisablei(GL_BLEND, 0);
break;
case Tegra::BlendMode::Premultiplied:
glEnablei(GL_BLEND, 0);
glBlendFuncSeparatei(0, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
break;
case Tegra::BlendMode::Coverage:
glEnablei(GL_BLEND, 0);
glBlendFuncSeparatei(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
break;
}
glBindTextureUnit(0, textures[i]);
glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE,
matrices[i].data());

View File

@ -362,10 +362,10 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
});
}
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
bool enable_blending) {
static vk::Pipeline CreateWrappedPipelineImpl(
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
VkPipelineColorBlendAttachmentState blending) {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@ -443,30 +443,6 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.alphaToOneEnable = VK_FALSE,
};
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{
.blendEnable = VK_TRUE,
.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
@ -474,8 +450,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments =
enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled,
.pAttachments = &blending,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
@ -515,6 +490,63 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
});
}
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
color_blend_attachment_disabled);
}
vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_premultiplied{
.blendEnable = VK_TRUE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
color_blend_attachment_premultiplied);
}
vk::Pipeline CreateWrappedCoverageBlendingPipeline(
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_coverage{
.blendEnable = VK_TRUE,
.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
color_blend_attachment_coverage);
}
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
VkSampler sampler, VkImageView view,
VkDescriptorSet set, u32 binding) {

View File

@ -42,8 +42,13 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
vk::DescriptorSetLayout& layout);
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
bool enable_blending = false);
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
vk::Pipeline CreateWrappedCoverageBlendingPipeline(
const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
VkSampler sampler, VkImageView view,
VkDescriptorSet set, u32 binding);

View File

@ -22,7 +22,7 @@ WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format,
CreatePipelineLayout();
CreateVertexShader();
CreateRenderPass(frame_format);
CreatePipeline();
CreatePipelines();
}
WindowAdaptPass::~WindowAdaptPass() = default;
@ -34,7 +34,6 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
const VkFramebuffer host_framebuffer{*dst->framebuffer};
const VkRenderPass renderpass{*render_pass};
const VkPipeline graphics_pipeline{*pipeline};
const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout};
const VkExtent2D render_area{
.width = dst->width,
@ -44,9 +43,23 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
const size_t layer_count = configs.size();
std::vector<PresentPushConstants> push_constants(layer_count);
std::vector<VkDescriptorSet> descriptor_sets(layer_count);
std::vector<VkPipeline> graphics_pipelines(layer_count);
auto layer_it = layers.begin();
for (size_t i = 0; i < layer_count; i++) {
switch (configs[i].blending) {
case Tegra::BlendMode::Opaque:
default:
graphics_pipelines[i] = *opaque_pipeline;
break;
case Tegra::BlendMode::Premultiplied:
graphics_pipelines[i] = *premultiplied_pipeline;
break;
case Tegra::BlendMode::Coverage:
graphics_pipelines[i] = *coverage_pipeline;
break;
}
layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler,
image_index, configs[i], layout);
layer_it++;
@ -77,8 +90,8 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area);
cmdbuf.ClearAttachments({clear_attachment}, {clear_rect});
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
for (size_t i = 0; i < layer_count; i++) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipelines[i]);
cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT,
push_constants[i]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0,
@ -129,9 +142,13 @@ void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) {
render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED);
}
void WindowAdaptPass::CreatePipeline() {
pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
std::tie(vertex_shader, fragment_shader), false);
void WindowAdaptPass::CreatePipelines() {
opaque_pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
std::tie(vertex_shader, fragment_shader));
premultiplied_pipeline = CreateWrappedPremultipliedBlendingPipeline(
device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
coverage_pipeline = CreateWrappedCoverageBlendingPipeline(
device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
}
} // namespace Vulkan

View File

@ -42,7 +42,7 @@ private:
void CreatePipelineLayout();
void CreateVertexShader();
void CreateRenderPass(VkFormat frame_format);
void CreatePipeline();
void CreatePipelines();
private:
const Device& device;
@ -52,7 +52,9 @@ private:
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::RenderPass render_pass;
vk::Pipeline pipeline;
vk::Pipeline opaque_pipeline;
vk::Pipeline premultiplied_pipeline;
vk::Pipeline coverage_pipeline;
};
} // namespace Vulkan

View File

@ -101,8 +101,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
surface),
blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler),
blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler),
blit_application_layer(device_memory, device, memory_allocator, present_manager, scheduler),
rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
scheduler) {
scheduler),
application_frame() {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });

View File

@ -80,8 +80,11 @@ private:
PresentManager present_manager;
BlitScreen blit_swapchain;
BlitScreen blit_screenshot;
BlitScreen blit_application_layer;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
Frame application_frame;
};
} // namespace Vulkan

View File

@ -746,7 +746,13 @@ std::pair<typename P::ImageView*, bool> TextureCache<P>::TryFindFramebufferImage
}();
const auto GetImageViewForFramebuffer = [&](ImageId image_id) {
const ImageViewInfo info{ImageViewType::e2D, view_format};
ImageViewInfo info{ImageViewType::e2D, view_format};
if (config.blending == Tegra::BlendMode::Opaque) {
info.x_source = static_cast<u8>(SwizzleSource::R);
info.y_source = static_cast<u8>(SwizzleSource::G);
info.z_source = static_cast<u8>(SwizzleSource::B);
info.w_source = static_cast<u8>(SwizzleSource::OneFloat);
}
return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)],
slot_images[image_id].IsRescaled());
};