mirror of
https://github.com/yuzu-emu/yuzu.git
synced 2025-07-06 15:47:52 -05:00
Merge branch 'master' into ssbo-align
This commit is contained in:
@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
const bool use_accelerated =
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
{
|
||||
std::scoped_lock lock{rasterizer.LockCaches()};
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
Frame* frame = present_manager.GetRenderFrame();
|
||||
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
|
||||
scheduler.Flush(*frame->render_ready);
|
||||
present_manager.Present(frame);
|
||||
}
|
||||
Frame* frame = present_manager.GetRenderFrame();
|
||||
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
|
||||
scheduler.Flush(*frame->render_ready);
|
||||
present_manager.Present(frame);
|
||||
|
||||
gpu.RendererFrameEndNotify();
|
||||
rasterizer.TickFrame();
|
||||
|
@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
|
||||
|
||||
BlitScreen::~BlitScreen() = default;
|
||||
|
||||
static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer,
|
||||
const ScreenInfo& screen_info) {
|
||||
f32 left, top, right, bottom;
|
||||
|
||||
if (!framebuffer.crop_rect.IsEmpty()) {
|
||||
// If crop rectangle is not empty, apply properties from rectangle.
|
||||
left = static_cast<f32>(framebuffer.crop_rect.left);
|
||||
top = static_cast<f32>(framebuffer.crop_rect.top);
|
||||
right = static_cast<f32>(framebuffer.crop_rect.right);
|
||||
bottom = static_cast<f32>(framebuffer.crop_rect.bottom);
|
||||
} else {
|
||||
// Otherwise, fall back to framebuffer dimensions.
|
||||
left = 0;
|
||||
top = 0;
|
||||
right = static_cast<f32>(framebuffer.width);
|
||||
bottom = static_cast<f32>(framebuffer.height);
|
||||
}
|
||||
|
||||
// Apply transformation flags.
|
||||
auto framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
|
||||
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) {
|
||||
// Switch left and right.
|
||||
std::swap(left, right);
|
||||
}
|
||||
if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) {
|
||||
// Switch top and bottom.
|
||||
std::swap(top, bottom);
|
||||
}
|
||||
|
||||
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH;
|
||||
framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV;
|
||||
if (True(framebuffer_transform_flags)) {
|
||||
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
|
||||
static_cast<u32>(framebuffer_transform_flags));
|
||||
}
|
||||
|
||||
// Get the screen properties.
|
||||
const f32 screen_width = static_cast<f32>(screen_info.width);
|
||||
const f32 screen_height = static_cast<f32>(screen_info.height);
|
||||
|
||||
// Normalize coordinate space.
|
||||
left /= screen_width;
|
||||
top /= screen_height;
|
||||
right /= screen_width;
|
||||
bottom /= screen_height;
|
||||
|
||||
return Common::Rectangle<f32>(left, top, right, bottom);
|
||||
}
|
||||
|
||||
void BlitScreen::Recreate() {
|
||||
present_manager.WaitPresent();
|
||||
scheduler.Finish();
|
||||
@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view);
|
||||
}
|
||||
if (fsr) {
|
||||
auto crop_rect = framebuffer.crop_rect;
|
||||
if (crop_rect.GetWidth() == 0) {
|
||||
crop_rect.right = framebuffer.width;
|
||||
}
|
||||
if (crop_rect.GetHeight() == 0) {
|
||||
crop_rect.bottom = framebuffer.height;
|
||||
}
|
||||
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
|
||||
VkExtent2D fsr_input_size{
|
||||
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
|
||||
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
|
||||
const auto crop_rect = NormalizeCrop(framebuffer, screen_info);
|
||||
const VkExtent2D fsr_input_size{
|
||||
.width = Settings::values.resolution_info.ScaleUp(screen_info.width),
|
||||
.height = Settings::values.resolution_info.ScaleUp(screen_info.height),
|
||||
};
|
||||
VkImageView fsr_image_view =
|
||||
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
|
||||
@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou
|
||||
|
||||
void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const {
|
||||
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
f32 left, top, right, bottom;
|
||||
|
||||
static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
|
||||
auto left = texcoords.left;
|
||||
auto right = texcoords.right;
|
||||
if (fsr) {
|
||||
// FSR has already applied the crop, so we just want to render the image
|
||||
// it has produced.
|
||||
left = 0;
|
||||
top = 0;
|
||||
right = 1;
|
||||
bottom = 1;
|
||||
} else {
|
||||
// Get the normalized crop rectangle.
|
||||
const auto crop = NormalizeCrop(framebuffer, screen_info);
|
||||
|
||||
switch (framebuffer_transform_flags) {
|
||||
case Service::android::BufferTransformFlags::Unset:
|
||||
break;
|
||||
case Service::android::BufferTransformFlags::FlipV:
|
||||
// Flip the framebuffer vertically
|
||||
left = texcoords.right;
|
||||
right = texcoords.left;
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
|
||||
static_cast<u32>(framebuffer_transform_flags));
|
||||
break;
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
|
||||
|
||||
f32 left_start{};
|
||||
if (framebuffer_crop_rect.Top() > 0) {
|
||||
left_start = static_cast<f32>(framebuffer_crop_rect.Top()) /
|
||||
static_cast<f32>(framebuffer_crop_rect.Bottom());
|
||||
}
|
||||
f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
|
||||
f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
|
||||
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
|
||||
// (e.g. handheld mode) on a 1920x1080 framebuffer.
|
||||
if (!fsr) {
|
||||
if (framebuffer_crop_rect.GetWidth() > 0) {
|
||||
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
|
||||
static_cast<f32>(screen_info.width);
|
||||
}
|
||||
if (framebuffer_crop_rect.GetHeight() > 0) {
|
||||
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
||||
static_cast<f32>(screen_info.height);
|
||||
}
|
||||
// Apply the crop.
|
||||
left = crop.left;
|
||||
top = crop.top;
|
||||
right = crop.right;
|
||||
bottom = crop.bottom;
|
||||
}
|
||||
|
||||
// Map the coordinates to the screen.
|
||||
const auto& screen = layout.screen;
|
||||
const auto x = static_cast<f32>(screen.left);
|
||||
const auto y = static_cast<f32>(screen.top);
|
||||
const auto w = static_cast<f32>(screen.GetWidth());
|
||||
const auto h = static_cast<f32>(screen.GetHeight());
|
||||
data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v);
|
||||
data.vertices[1] =
|
||||
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v);
|
||||
data.vertices[2] =
|
||||
ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v);
|
||||
data.vertices[3] =
|
||||
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v);
|
||||
|
||||
data.vertices[0] = ScreenRectVertex(x, y, left, top);
|
||||
data.vertices[1] = ScreenRectVertex(x + w, y, right, top);
|
||||
data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
|
||||
data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
|
||||
}
|
||||
|
||||
void BlitScreen::CreateSMAA(VkExtent2D smaa_size) {
|
||||
|
@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
|
||||
device{&runtime.device}, buffer{
|
||||
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
|
||||
device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
|
||||
tracker{SizeBytes()} {
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
@ -359,12 +359,31 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetStorageBufferAlignment());
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
|
||||
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
|
||||
it->ResetUsageTracking();
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
if (Settings::values.disable_buffer_reorder) {
|
||||
return false;
|
||||
}
|
||||
const bool can_use_upload_cmdbuf =
|
||||
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
|
||||
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
|
||||
});
|
||||
return can_use_upload_cmdbuf;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload) {
|
||||
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
@ -380,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
|
||||
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
|
||||
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
if (barrier) {
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/usage_tracker.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
@ -34,6 +35,18 @@ public:
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
|
||||
return tracker.IsUsed(offset, size);
|
||||
}
|
||||
|
||||
void MarkUsage(u64 offset, u64 size) noexcept {
|
||||
tracker.Track(offset, size);
|
||||
}
|
||||
|
||||
void ResetUsageTracking() noexcept {
|
||||
tracker.Reset();
|
||||
}
|
||||
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
@ -49,6 +62,7 @@ private:
|
||||
const Device* device{};
|
||||
vk::Buffer buffer;
|
||||
std::vector<BufferView> views;
|
||||
VideoCommon::UsageTracker tracker;
|
||||
};
|
||||
|
||||
class QuadArrayIndexBuffer;
|
||||
@ -67,6 +81,8 @@ public:
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||
DescriptorPool& descriptor_pool);
|
||||
|
||||
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
|
||||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
@ -81,12 +97,15 @@ public:
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
|
||||
|
||||
void PreCopyBarrier();
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload = false);
|
||||
|
||||
void PostCopyBarrier();
|
||||
|
||||
|
@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
|
||||
}
|
||||
|
||||
VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
|
||||
|
||||
UpdateDescriptorSet(image_index, image_view);
|
||||
|
||||
@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag
|
||||
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
|
||||
|
||||
std::array<u32, 4 * 4> push_constants;
|
||||
FsrEasuConOffset(
|
||||
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
|
||||
push_constants.data() + 12,
|
||||
const f32 input_image_width = static_cast<f32>(input_image_extent.width);
|
||||
const f32 input_image_height = static_cast<f32>(input_image_extent.height);
|
||||
const f32 output_image_width = static_cast<f32>(output_size.width);
|
||||
const f32 output_image_height = static_cast<f32>(output_size.height);
|
||||
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
|
||||
const f32 viewport_x = crop_rect.left * input_image_width;
|
||||
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
|
||||
const f32 viewport_y = crop_rect.top * input_image_height;
|
||||
|
||||
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
|
||||
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
|
||||
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
|
||||
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
|
||||
std::array<u32, 4 * 4> push_constants;
|
||||
FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4,
|
||||
push_constants.data() + 8, push_constants.data() + 12,
|
||||
|
||||
viewport_width, viewport_height, input_image_width, input_image_height,
|
||||
output_image_width, output_image_height, viewport_x, viewport_y);
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||
|
||||
{
|
||||
|
@ -17,7 +17,7 @@ public:
|
||||
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||
VkExtent2D output_size);
|
||||
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect);
|
||||
|
||||
private:
|
||||
void CreateDescriptorPool();
|
||||
|
@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
|
||||
Refresh();
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
if (semaphore) {
|
||||
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
|
||||
host_tick);
|
||||
} else {
|
||||
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
};
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
const VkSemaphore timeline_semaphore = *semaphore;
|
||||
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
const std::array signal_values{host_tick, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
const VkTimelineSemaphoreSubmitInfo timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
return device.GetGraphicsQueue().Submit(submit_info);
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
@ -52,14 +52,16 @@ public:
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the device graphics queue, updating the tick as necessary
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
|
||||
|
||||
private:
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
|
||||
void WaitThread(std::stop_token token);
|
||||
|
||||
|
@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
||||
info.y_negate = key.state.y_negate != 0;
|
||||
return info;
|
||||
}
|
||||
|
||||
size_t GetTotalPipelineWorkers() {
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Leave at least a few cores free in android
|
||||
constexpr size_t free_cores = 3ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
return max_core_threads - free_cores;
|
||||
#else
|
||||
return max_core_threads;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
||||
texture_cache{texture_cache_}, shader_notify{shader_notify_},
|
||||
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
|
||||
use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
|
||||
#ifdef ANDROID
|
||||
workers(1, "VkPipelineBuilder"),
|
||||
#else
|
||||
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
|
||||
#endif
|
||||
workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(),
|
||||
"VkPipelineBuilder"),
|
||||
serialization_thread(1, "VkPipelineSerialization") {
|
||||
const auto& float_control{device.FloatControlProperties()};
|
||||
const VkDriverId driver_id{device.GetDriverID()};
|
||||
@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_native_ndc = device.IsExtDepthClipControlSupported(),
|
||||
.support_scaled_attributes = !device.MustEmulateScaledFormats(),
|
||||
.support_multi_viewport = device.SupportsMultiViewport(),
|
||||
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
|
||||
|
@ -211,6 +211,13 @@ public:
|
||||
return;
|
||||
}
|
||||
PauseCounter();
|
||||
const auto driver_id = device.GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
pending_sync.clear();
|
||||
sync_values_stash.clear();
|
||||
return;
|
||||
}
|
||||
sync_values_stash.clear();
|
||||
sync_values_stash.emplace_back();
|
||||
std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
|
||||
@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
||||
return true;
|
||||
}
|
||||
|
||||
auto driver_id = impl->device.GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
|
||||
}
|
||||
|
||||
if (y_negate) {
|
||||
y += height;
|
||||
y += conv(static_cast<f32>(regs.surface_clip.height));
|
||||
height = -height;
|
||||
}
|
||||
|
||||
@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
// update engine as channel may be different.
|
||||
pipeline->SetEngine(maxwell3d, gpu_memory);
|
||||
pipeline->Configure(is_indexed);
|
||||
@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushCommands() {
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
if (draw_counter == 0) {
|
||||
return;
|
||||
}
|
||||
@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() {
|
||||
if ((++draw_counter & 7) != 7) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
if (draw_counter < DRAWS_TO_DISPATCH) {
|
||||
// Send recorded tasks to the worker thread
|
||||
scheduler.DispatchWork();
|
||||
@ -923,9 +921,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::HandleTransformFeedback() {
|
||||
static std::once_flag warn_unsupported;
|
||||
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
std::call_once(warn_unsupported, [&] {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
});
|
||||
return;
|
||||
}
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
|
||||
@ -1503,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
|
||||
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
||||
CreateChannel(channel);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.CreateChannel(channel);
|
||||
buffer_cache.CreateChannel(channel);
|
||||
}
|
||||
@ -1516,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
|
||||
const s32 channel_id = channel.bind_id;
|
||||
BindToChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.BindToChannel(channel_id);
|
||||
buffer_cache.BindToChannel(channel_id);
|
||||
}
|
||||
@ -1529,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
|
||||
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
|
||||
EraseChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{LockCaches()};
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.EraseChannel(channel_id);
|
||||
buffer_cache.EraseChannel(channel_id);
|
||||
}
|
||||
|
@ -133,10 +133,6 @@ public:
|
||||
|
||||
void ReleaseChannel(s32 channel_id) override;
|
||||
|
||||
std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
|
||||
return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
|
@ -22,11 +22,12 @@ namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
|
||||
vk::CommandBuffer upload_cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->Execute(cmdbuf, upload_cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
// Perform the work, tracking whether the chunk was a submission
|
||||
// before executing.
|
||||
const bool has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
|
||||
|
||||
// If the chunk was a submission, reallocate the command buffer.
|
||||
if (has_submit) {
|
||||
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
|
||||
current_upload_cmdbuf.Begin({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
}
|
||||
|
||||
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
InvalidateState();
|
||||
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
|
||||
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
|
||||
upload_cmdbuf.End();
|
||||
cmdbuf.End();
|
||||
|
||||
if (on_submit) {
|
||||
@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
|
||||
std::scoped_lock lock{submit_mutex};
|
||||
switch (const VkResult result = master_semaphore->SubmitQueue(
|
||||
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
|
@ -80,7 +80,8 @@ public:
|
||||
|
||||
/// Send work to a separate thread.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
|
||||
void RecordWithUploadBuffer(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
@ -88,6 +89,15 @@ public:
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer>
|
||||
void Record(T&& c) {
|
||||
this->RecordWithUploadBuffer(
|
||||
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
|
||||
command(cmdbuf);
|
||||
});
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
@ -119,7 +129,7 @@ private:
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
@ -142,8 +152,8 @@ private:
|
||||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
|
||||
command(cmdbuf, upload_cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -152,7 +162,7 @@ private:
|
||||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
@ -228,6 +238,7 @@ private:
|
||||
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
vk::CommandBuffer current_upload_cmdbuf;
|
||||
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
std::function<void()> on_submit;
|
||||
|
@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
|
||||
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
|
||||
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
|
||||
|
||||
scheduler.Record([&](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
for (auto& images : m_dynamic_images) {
|
||||
for (size_t i = 0; i < MaxDynamicImage; i++) {
|
||||
ClearColorImage(cmdbuf, *images.images[i]);
|
||||
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
|
||||
UpdateDescriptorSets(source_image_view, image_index);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
|
||||
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
|
||||
|
@ -36,6 +36,10 @@ public:
|
||||
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
|
||||
void FreeDeferred(StagingBufferRef& ref);
|
||||
|
||||
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
|
||||
return *stream_buffer;
|
||||
}
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
|
@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
|
||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
|
||||
: VideoCommon::ImageViewBase{params} {}
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
|
||||
: VideoCommon::ImageViewBase{params}, device{&runtime.device} {
|
||||
if (device->HasNullDescriptor()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle fallback for devices without nullDescriptor
|
||||
ImageInfo info{};
|
||||
info.format = PixelFormat::A8B8G8R8_UNORM;
|
||||
|
||||
null_image = MakeImage(*device, runtime.memory_allocator, info, {});
|
||||
image_handle = *null_image;
|
||||
for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) {
|
||||
image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
ImageView::~ImageView() = default;
|
||||
|
||||
|
@ -267,6 +267,7 @@ private:
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
vk::Image null_image;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
Reference in New Issue
Block a user