mirror of
https://github.com/yuzu-emu/yuzu-android.git
synced 2025-06-20 01:47:54 -05:00
Remove memory allocations in some hot paths
This commit is contained in:
@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
||||
if (has_new_downloads) {
|
||||
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
|
||||
}
|
||||
tmp_buffer.resize(amount);
|
||||
tmp_buffer.resize_destructive(amount);
|
||||
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
|
||||
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
|
||||
return true;
|
||||
@ -1279,7 +1279,7 @@ template <class P>
|
||||
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
|
||||
u32 wanted_size) {
|
||||
static constexpr int STREAM_LEAP_THRESHOLD = 16;
|
||||
std::vector<BufferId> overlap_ids;
|
||||
boost::container::small_vector<BufferId, 16> overlap_ids;
|
||||
VAddr begin = cpu_addr;
|
||||
VAddr end = cpu_addr + wanted_size;
|
||||
int stream_score = 0;
|
||||
|
@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
|
||||
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
|
||||
|
||||
struct OverlapResult {
|
||||
std::vector<BufferId> ids;
|
||||
boost::container::small_vector<BufferId, 16> ids;
|
||||
VAddr begin;
|
||||
VAddr end;
|
||||
bool has_stream_leap = false;
|
||||
@ -582,7 +582,7 @@ private:
|
||||
BufferId inline_buffer_id;
|
||||
|
||||
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
|
||||
std::vector<u8> tmp_buffer;
|
||||
Common::ScratchBuffer<u8> tmp_buffer;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@ -63,7 +63,6 @@ struct ChCommand {
|
||||
};
|
||||
|
||||
using ChCommandHeaderList = std::vector<ChCommandHeader>;
|
||||
using ChCommandList = std::vector<ChCommand>;
|
||||
|
||||
struct ThiRegisters {
|
||||
u32_le increment_syncpt{};
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <queue>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
|
||||
struct CommandList final {
|
||||
CommandList() = default;
|
||||
explicit CommandList(std::size_t size) : command_lists(size) {}
|
||||
explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
|
||||
explicit CommandList(
|
||||
boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
|
||||
: prefetch_command_list{std::move(prefetch_command_list_)} {}
|
||||
|
||||
std::vector<CommandListHeader> command_lists;
|
||||
std::vector<CommandHeader> prefetch_command_list;
|
||||
boost::container::small_vector<CommandListHeader, 512> command_lists;
|
||||
boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
|
||||
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
|
||||
ASSERT(regs.remap_const.component_size_minus_one == 3);
|
||||
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
|
||||
std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
|
||||
read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
|
||||
std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
|
||||
std::ranges::fill(span, regs.remap_consta_value);
|
||||
memory_manager.WriteBlockUnsafe(regs.offset_out,
|
||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
reinterpret_cast<u8*>(read_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.FlushCaching();
|
||||
@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
|
||||
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
|
||||
std::vector<u8> tmp_buffer(16);
|
||||
read_buffer.resize_destructive(16);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
read_buffer.data(), read_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
|
||||
read_buffer.size());
|
||||
}
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
|
||||
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
|
||||
std::vector<u8> tmp_buffer(16);
|
||||
read_buffer.resize_destructive(16);
|
||||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
|
||||
read_buffer.size());
|
||||
memory_manager.WriteBlockCached(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
read_buffer.data(), read_buffer.size());
|
||||
}
|
||||
} else {
|
||||
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
read_buffer.resize_destructive(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||
memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
src_operand.address = regs.offset_in;
|
||||
|
||||
DMA::BufferOperand dst_operand;
|
||||
dst_operand.pitch = regs.pitch_out;
|
||||
u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
|
||||
dst_operand.pitch = abs_pitch_out;
|
||||
dst_operand.width = regs.line_length_in;
|
||||
dst_operand.height = regs.line_count;
|
||||
dst_operand.address = regs.offset_out;
|
||||
@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
const size_t src_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
|
||||
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
|
||||
const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
|
||||
read_buffer.resize_destructive(src_size);
|
||||
write_buffer.resize_destructive(dst_size);
|
||||
|
||||
@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
|
||||
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
|
||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
abs_pitch_out);
|
||||
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <array>
|
||||
#include <bit>
|
||||
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/host1x/codecs/h264.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
|
||||
std::vector<u8> scan(count);
|
||||
static Common::ScratchBuffer<u8> scan{};
|
||||
scan.resize_destructive(count);
|
||||
if (count == 16) {
|
||||
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
|
||||
} else {
|
||||
|
@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
|
||||
|
||||
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
|
||||
VideoCommon::CacheType which) {
|
||||
std::vector<u8> tmp_buffer(size);
|
||||
tmp_buffer.resize_destructive(size);
|
||||
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
|
||||
|
||||
// The output block must be flushed in case it has data modified from the GPU.
|
||||
@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
GPUVAddr gpu_addr, std::size_t size) const {
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
||||
boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
|
||||
MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
|
||||
boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
|
||||
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
|
||||
return result;
|
||||
}
|
||||
@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
template <bool is_gpu_address>
|
||||
void MemoryManager::GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
result) const {
|
||||
boost::container::small_vector<
|
||||
std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
|
||||
const {
|
||||
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
|
||||
last_segment{};
|
||||
std::optional<VAddr> old_page_addr{};
|
||||
|
@ -8,10 +8,12 @@
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/multi_level_page_table.h"
|
||||
#include "common/range_map.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/virtual_buffer.h"
|
||||
#include "video_core/cache_types.h"
|
||||
#include "video_core/pte_kind.h"
|
||||
@ -107,8 +109,8 @@ public:
|
||||
* if the region is continuous, a single pair will be returned. If it's unmapped, an empty
|
||||
* vector will be returned;
|
||||
*/
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
||||
std::size_t size) const;
|
||||
boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
|
||||
GPUVAddr gpu_addr, std::size_t size) const;
|
||||
|
||||
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
|
||||
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
|
||||
@ -165,7 +167,8 @@ private:
|
||||
template <bool is_gpu_address>
|
||||
void GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
boost::container::small_vector<
|
||||
std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
|
||||
result) const;
|
||||
|
||||
Core::System& system;
|
||||
@ -215,8 +218,8 @@ private:
|
||||
Common::VirtualBuffer<u32> big_page_table_cpu;
|
||||
|
||||
std::vector<u64> big_page_continuous;
|
||||
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
|
||||
std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
|
||||
boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
|
||||
boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
|
||||
|
||||
mutable std::mutex guard;
|
||||
|
||||
@ -226,6 +229,8 @@ private:
|
||||
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
|
||||
|
||||
static std::atomic<size_t> unique_identifier_generator;
|
||||
|
||||
Common::ScratchBuffer<u8> tmp_buffer;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
|
||||
case Shader::Stage::VertexB:
|
||||
case Shader::Stage::Geometry:
|
||||
if (!use_assembly_shaders && key.xfb_enabled != 0) {
|
||||
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
|
||||
auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
|
||||
info.xfb_varyings = varyings;
|
||||
info.xfb_count = count;
|
||||
}
|
||||
break;
|
||||
case Shader::Stage::TessellationEval:
|
||||
|
@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
|
@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
||||
info.fixed_state_point_size = point_size;
|
||||
}
|
||||
if (key.state.xfb_enabled) {
|
||||
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
|
||||
auto [varyings, count] =
|
||||
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
|
||||
info.xfb_varyings = varyings;
|
||||
info.xfb_count = count;
|
||||
}
|
||||
info.convert_depth_mode = gl_ndc;
|
||||
}
|
||||
@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
||||
info.fixed_state_point_size = point_size;
|
||||
}
|
||||
if (key.state.xfb_enabled != 0) {
|
||||
info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
|
||||
auto [varyings, count] =
|
||||
VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
|
||||
info.xfb_varyings = varyings;
|
||||
info.xfb_count = count;
|
||||
}
|
||||
info.convert_depth_mode = gl_ndc;
|
||||
break;
|
||||
|
@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
};
|
||||
}
|
||||
|
||||
[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
|
||||
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
|
||||
std::vector<VkBufferCopy> result(copies.size());
|
||||
[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
|
||||
TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
|
||||
boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
|
||||
std::ranges::transform(
|
||||
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
|
||||
return VkBufferCopy{
|
||||
@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
|
||||
[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
|
||||
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
|
||||
struct Maker {
|
||||
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
|
||||
@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
VkImageAspectFlags aspect_mask;
|
||||
};
|
||||
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
std::vector<VkBufferImageCopy> result(copies.size() * 2);
|
||||
boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
|
||||
std::ranges::transform(copies, result.begin(),
|
||||
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
|
||||
std::ranges::transform(copies, result.begin() + copies.size(),
|
||||
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
|
||||
return result;
|
||||
} else {
|
||||
std::vector<VkBufferImageCopy> result(copies.size());
|
||||
boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
|
||||
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
|
||||
return result;
|
||||
}
|
||||
@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
|
||||
|
||||
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
|
||||
std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
|
||||
boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
|
||||
boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
|
||||
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
|
||||
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
|
||||
|
||||
@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
|
||||
|
||||
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
std::vector<VkImageCopy> vk_copies(copies.size());
|
||||
boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
|
||||
const VkImageAspectFlags aspect_mask = dst.AspectMask();
|
||||
ASSERT(aspect_mask == src.AspectMask());
|
||||
|
||||
@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
ScaleDown(true);
|
||||
}
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
|
||||
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
|
||||
const VkBuffer src_buffer = buffer;
|
||||
const VkImage vk_image = *original_image;
|
||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||
@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
|
||||
if (is_rescaled) {
|
||||
ScaleDown();
|
||||
}
|
||||
boost::container::small_vector<VkBuffer, 1> buffers_vector{};
|
||||
boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
|
||||
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
|
||||
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
|
||||
vk_copies;
|
||||
for (size_t index = 0; index < buffers_span.size(); index++) {
|
||||
buffers_vector.emplace_back(buffers_span[index]);
|
||||
vk_copies.emplace_back(
|
||||
@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
|
||||
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, bool is_rescaled) {
|
||||
std::vector<VkImageView> attachments;
|
||||
boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
|
||||
RenderPassKey renderpass_key{};
|
||||
s32 num_layers = 1;
|
||||
|
||||
|
@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
|
||||
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
|
||||
marked_for_removal.end());
|
||||
|
||||
std::vector<ShaderInfo*> removed_shaders;
|
||||
removed_shaders.reserve(marked_for_removal.size());
|
||||
boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
|
||||
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
for (Entry* const entry : marked_for_removal) {
|
||||
removed_shaders.push_back(entry->data);
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
@ -108,8 +109,8 @@ struct ImageBase {
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
|
||||
std::vector<u32> slice_offsets;
|
||||
std::vector<SubresourceBase> slice_subresources;
|
||||
boost::container::small_vector<u32, 16> slice_offsets;
|
||||
boost::container::small_vector<SubresourceBase, 16> slice_subresources;
|
||||
|
||||
std::vector<AliasedImage> aliased_images;
|
||||
std::vector<ImageId> overlapping_images;
|
||||
|
@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> images;
|
||||
boost::container::small_vector<ImageId, 16> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
boost::container::small_vector<ImageId, 16> deleted_images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
boost::container::small_vector<ImageId, 16> deleted_images;
|
||||
ForEachImageInRegionGPU(as_id, gpu_addr, size,
|
||||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
const bool native_bgr = runtime.HasNativeBgr();
|
||||
const bool flexible_formats = True(options & RelaxedOptions::Format);
|
||||
ImageId image_id{};
|
||||
boost::container::small_vector<ImageId, 1> image_ids;
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||
return false;
|
||||
@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
|
||||
}
|
||||
}
|
||||
ImageId image_id{};
|
||||
boost::container::small_vector<ImageId, 1> image_ids;
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||
return false;
|
||||
@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||
image.map_view_id = map_id;
|
||||
return;
|
||||
}
|
||||
std::vector<ImageViewId> sparse_maps{};
|
||||
boost::container::small_vector<ImageViewId, 16> sparse_maps;
|
||||
ForEachSparseSegment(
|
||||
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
|
||||
@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
|
||||
boost::container::small_vector<const AliasedImage*, 8> aliased_images;
|
||||
Image& image = slot_images[image_id];
|
||||
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
|
||||
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
|
@ -56,7 +56,7 @@ struct ImageViewInOut {
|
||||
struct AsyncDecodeContext {
|
||||
ImageId image_id;
|
||||
Common::ScratchBuffer<u8> decoded_data;
|
||||
std::vector<BufferImageCopy> copies;
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies;
|
||||
std::mutex mutex;
|
||||
std::atomic_bool complete;
|
||||
};
|
||||
@ -429,7 +429,7 @@ private:
|
||||
|
||||
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
|
||||
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
|
||||
std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
|
||||
|
||||
VAddr virtual_invalid_space{};
|
||||
|
||||
|
@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
|
||||
|
||||
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
|
||||
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
|
||||
const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
|
||||
const auto slice_offsets = CalculateSliceOffsets(new_info);
|
||||
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
|
||||
const auto it = std::ranges::find(slice_offsets, diff);
|
||||
if (it == slice_offsets.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const std::vector subresources = CalculateSliceSubresources(new_info);
|
||||
const auto subresources = CalculateSliceSubresources(new_info);
|
||||
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
|
||||
const ImageInfo& info = overlap.info;
|
||||
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
|
||||
@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
|
||||
return sizes;
|
||||
}
|
||||
|
||||
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
||||
boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
|
||||
ASSERT(info.type == ImageType::e3D);
|
||||
std::vector<u32> offsets;
|
||||
boost::container::small_vector<u32, 16> offsets;
|
||||
offsets.reserve(NumSlices(info));
|
||||
|
||||
const LevelInfo level_info = MakeLevelInfo(info);
|
||||
@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
||||
return offsets;
|
||||
}
|
||||
|
||||
std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
|
||||
boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
|
||||
const ImageInfo& info) {
|
||||
ASSERT(info.type == ImageType::e3D);
|
||||
std::vector<SubresourceBase> subresources;
|
||||
boost::container::small_vector<SubresourceBase, 16> subresources;
|
||||
subresources.reserve(NumSlices(info));
|
||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||
const s32 depth = AdjustMipSize(info.size.depth, level);
|
||||
@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
|
||||
SubresourceBase base, u32 up_scale, u32 down_shift) {
|
||||
boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
|
||||
const ImageInfo& src,
|
||||
SubresourceBase base,
|
||||
u32 up_scale, u32 down_shift) {
|
||||
ASSERT(dst.resources.levels >= src.resources.levels);
|
||||
|
||||
const bool is_dst_3d = dst.type == ImageType::e3D;
|
||||
@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
||||
ASSERT(src.resources.levels == 1);
|
||||
}
|
||||
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
|
||||
std::vector<ImageCopy> copies;
|
||||
boost::container::small_vector<ImageCopy, 16> copies;
|
||||
copies.reserve(src.resources.levels);
|
||||
for (s32 level = 0; level < src.resources.levels; ++level) {
|
||||
ImageCopy& copy = copies.emplace_back();
|
||||
@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
||||
return copies;
|
||||
}
|
||||
|
||||
std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale,
|
||||
u32 down_shift) {
|
||||
std::vector<ImageCopy> copies;
|
||||
boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
|
||||
u32 up_scale,
|
||||
u32 down_shift) {
|
||||
boost::container::small_vector<ImageCopy, 16> copies;
|
||||
copies.reserve(src.resources.levels);
|
||||
const bool is_3d = src.type == ImageType::e3D;
|
||||
for (s32 level = 0; level < src.resources.levels; ++level) {
|
||||
@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
|
||||
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
|
||||
}
|
||||
|
||||
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageInfo& info, std::span<const u8> input,
|
||||
std::span<u8> output) {
|
||||
boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||
GPUVAddr gpu_addr,
|
||||
const ImageInfo& info,
|
||||
std::span<const u8> input,
|
||||
std::span<u8> output) {
|
||||
const size_t guest_size_bytes = input.size_bytes();
|
||||
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
|
||||
const Extent3D size = info.size;
|
||||
@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
|
||||
info.tile_width_spacing);
|
||||
size_t guest_offset = 0;
|
||||
u32 host_offset = 0;
|
||||
std::vector<BufferImageCopy> copies(num_levels);
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
|
||||
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
|
||||
boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
|
||||
const Extent3D size = info.size;
|
||||
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||
if (info.type == ImageType::Linear) {
|
||||
@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
|
||||
|
||||
u32 host_offset = 0;
|
||||
|
||||
std::vector<BufferImageCopy> copies(num_levels);
|
||||
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
|
||||
@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
|
||||
return AdjustMipBlockSize(num_tiles, level_info.block, level);
|
||||
}
|
||||
|
||||
std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
|
||||
boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
|
||||
const Extent2D tile_size = DefaultBlockSize(info.format);
|
||||
if (info.type == ImageType::Linear) {
|
||||
return std::vector{SwizzleParameters{
|
||||
return {SwizzleParameters{
|
||||
.num_tiles = AdjustTileSize(info.size, tile_size),
|
||||
.block = {},
|
||||
.buffer_offset = 0,
|
||||
@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
|
||||
const s32 num_levels = info.resources.levels;
|
||||
|
||||
u32 guest_offset = 0;
|
||||
std::vector<SwizzleParameters> params(num_levels);
|
||||
boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
@ -40,9 +41,10 @@ struct OverlapResult {
|
||||
|
||||
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
|
||||
|
||||
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
|
||||
const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
|
||||
|
||||
@ -51,21 +53,18 @@ struct OverlapResult {
|
||||
|
||||
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
|
||||
|
||||
[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
|
||||
const ImageInfo& src,
|
||||
SubresourceBase base, u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
|
||||
const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
|
||||
[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src,
|
||||
u32 up_scale = 1,
|
||||
u32 down_shift = 0);
|
||||
[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
|
||||
const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
|
||||
|
||||
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
||||
|
||||
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||
GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const u8> input,
|
||||
std::span<u8> output);
|
||||
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
|
||||
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const u8> input, std::span<u8> output);
|
||||
|
||||
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageBase& image, std::span<u8> output);
|
||||
@ -73,13 +72,15 @@ struct OverlapResult {
|
||||
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
||||
std::span<BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
|
||||
const ImageInfo& info);
|
||||
|
||||
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
|
||||
|
||||
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
|
||||
|
||||
[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
|
||||
[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
|
||||
const ImageInfo& info);
|
||||
|
||||
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::span<const BufferImageCopy> copies, std::span<const u8> memory,
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
|
||||
std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
|
||||
const TransformFeedbackState& state) {
|
||||
static constexpr std::array VECTORS{
|
||||
28U, // gl_Position
|
||||
@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
|
||||
216U, // gl_TexCoord[6]
|
||||
220U, // gl_TexCoord[7]
|
||||
};
|
||||
std::vector<Shader::TransformFeedbackVarying> xfb(256);
|
||||
std::array<Shader::TransformFeedbackVarying, 256> xfb{};
|
||||
u32 count{0};
|
||||
for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
|
||||
const auto& locations = state.varyings[buffer];
|
||||
const auto& layout = state.layouts[buffer];
|
||||
@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
|
||||
}
|
||||
}
|
||||
xfb[attribute] = varying;
|
||||
count = std::max(count, attribute);
|
||||
highest = std::max(highest, (base_offset + varying.components) * 4);
|
||||
}
|
||||
UNIMPLEMENTED_IF(highest != layout.stride);
|
||||
}
|
||||
return xfb;
|
||||
return {xfb, count + 1};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@ -24,7 +24,7 @@ struct TransformFeedbackState {
|
||||
varyings;
|
||||
};
|
||||
|
||||
std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
|
||||
std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
|
||||
const TransformFeedbackState& state);
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
|
||||
std::vector<const char*> ExtensionListForVulkan(
|
||||
const std::set<std::string, std::less<>>& extensions) {
|
||||
std::vector<const char*> output;
|
||||
output.reserve(extensions.size());
|
||||
for (const auto& extension : extensions) {
|
||||
output.push_back(extension.c_str());
|
||||
}
|
||||
|
Reference in New Issue
Block a user