mirror of
https://github.com/yuzu-emu/yuzu.git
synced 2025-07-06 15:47:52 -05:00
renderer_vulkan: Introduce separate cmd buffer for uploads
This commit is contained in:
committed by
Fernando Sahmkow
parent
767c4b5a99
commit
efc50485b8
@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
|
||||
device{&runtime.device}, buffer{
|
||||
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
|
||||
device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
|
||||
tracker{SizeBytes()} {
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
@ -355,12 +355,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
|
||||
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
|
||||
it->ResetUsageTracking();
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
if (Settings::values.disable_buffer_reorder) {
|
||||
return false;
|
||||
}
|
||||
const bool can_use_upload_cmdbuf =
|
||||
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
|
||||
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
|
||||
});
|
||||
return can_use_upload_cmdbuf;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload) {
|
||||
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
@ -376,9 +395,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
|
||||
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
|
||||
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
if (barrier) {
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/usage_tracker.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
@ -34,6 +35,18 @@ public:
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
|
||||
return tracker.IsUsed(offset, size);
|
||||
}
|
||||
|
||||
void MarkUsage(u64 offset, u64 size) noexcept {
|
||||
tracker.Track(offset, size);
|
||||
}
|
||||
|
||||
void ResetUsageTracking() noexcept {
|
||||
tracker.Reset();
|
||||
}
|
||||
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
@ -49,6 +62,7 @@ private:
|
||||
const Device* device{};
|
||||
vk::Buffer buffer;
|
||||
std::vector<BufferView> views;
|
||||
VideoCommon::UsageTracker tracker;
|
||||
};
|
||||
|
||||
class QuadArrayIndexBuffer;
|
||||
@ -67,6 +81,8 @@ public:
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||
DescriptorPool& descriptor_pool);
|
||||
|
||||
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
|
||||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
@ -79,12 +95,15 @@ public:
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
|
||||
|
||||
void PreCopyBarrier();
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier,
|
||||
bool can_reorder_upload = false);
|
||||
|
||||
void PostCopyBarrier();
|
||||
|
||||
|
@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
|
||||
Refresh();
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
if (semaphore) {
|
||||
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
|
||||
host_tick);
|
||||
} else {
|
||||
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
};
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
const VkSemaphore timeline_semaphore = *semaphore;
|
||||
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
const std::array signal_values{host_tick, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
const VkTimelineSemaphoreSubmitInfo timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
return device.GetGraphicsQueue().Submit(submit_info);
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
|
||||
vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick) {
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
|
||||
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
|
||||
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.commandBufferCount = static_cast<u32>(cmdbuffers.size()),
|
||||
.pCommandBuffers = cmdbuffers.data(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
@ -52,14 +52,16 @@ public:
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the device graphics queue, updating the tick as necessary
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
|
||||
|
||||
private:
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
|
||||
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
|
||||
u64 host_tick);
|
||||
|
||||
void WaitThread(std::stop_token token);
|
||||
|
||||
|
@ -22,11 +22,12 @@ namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
|
||||
vk::CommandBuffer upload_cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->Execute(cmdbuf, upload_cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
// Perform the work, tracking whether the chunk was a submission
|
||||
// before executing.
|
||||
const bool has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
|
||||
|
||||
// If the chunk was a submission, reallocate the command buffer.
|
||||
if (has_submit) {
|
||||
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
|
||||
current_upload_cmdbuf.Begin({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
}
|
||||
|
||||
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
@ -212,7 +220,9 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
InvalidateState();
|
||||
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
|
||||
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
|
||||
upload_cmdbuf.End();
|
||||
cmdbuf.End();
|
||||
|
||||
if (on_submit) {
|
||||
@ -221,7 +231,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||
|
||||
std::scoped_lock lock{submit_mutex};
|
||||
switch (const VkResult result = master_semaphore->SubmitQueue(
|
||||
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
|
@ -80,7 +80,8 @@ public:
|
||||
|
||||
/// Send work to a separate thread.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
|
||||
void RecordWithUploadBuffer(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
@ -88,6 +89,15 @@ public:
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_invocable_v<T, vk::CommandBuffer>
|
||||
void Record(T&& c) {
|
||||
this->RecordWithUploadBuffer(
|
||||
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
|
||||
command(cmdbuf);
|
||||
});
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
@ -119,7 +129,7 @@ private:
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
@ -142,8 +152,8 @@ private:
|
||||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
|
||||
command(cmdbuf, upload_cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -152,7 +162,7 @@ private:
|
||||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
@ -228,6 +238,7 @@ private:
|
||||
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
vk::CommandBuffer current_upload_cmdbuf;
|
||||
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
std::function<void()> on_submit;
|
||||
|
@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
|
||||
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
|
||||
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
|
||||
|
||||
scheduler.Record([&](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
for (auto& images : m_dynamic_images) {
|
||||
for (size_t i = 0; i < MaxDynamicImage; i++) {
|
||||
ClearColorImage(cmdbuf, *images.images[i]);
|
||||
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
|
||||
UpdateDescriptorSets(source_image_view, image_index);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) {
|
||||
scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
|
||||
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
|
||||
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
|
||||
|
@ -36,6 +36,10 @@ public:
|
||||
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
|
||||
void FreeDeferred(StagingBufferRef& ref);
|
||||
|
||||
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
|
||||
return *stream_buffer;
|
||||
}
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
|
Reference in New Issue
Block a user